hivecorp commited on
Commit
f5e4024
·
verified ·
1 Parent(s): f826da3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -71
app.py CHANGED
@@ -6,37 +6,30 @@ import asyncio
6
  import uuid
7
  import re
8
 
9
- # Function to get the length of an audio file in seconds
10
  def get_audio_length(audio_file):
11
  audio = AudioSegment.from_file(audio_file)
12
- return audio.duration_seconds
13
-
14
- # Function to format time for SRT
15
- def format_time(seconds):
16
- millis = int((seconds % 1) * 1000)
17
- seconds = int(seconds)
18
- hrs = seconds // 3600
19
- mins = (seconds % 3600) // 60
20
- secs = seconds % 60
21
- return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
22
-
23
- # Function to split text based on punctuation, handling segments over 8 words
24
  def split_text_into_segments(text):
25
- # Split based on punctuation marks (.!? and ,)
26
  segments = []
27
  raw_segments = re.split(r'([.!?,])', text)
28
- temp_sentence = ""
29
-
30
  for i in range(0, len(raw_segments) - 1, 2):
31
- # Combine sentence with punctuation
32
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
33
  words = sentence.split()
34
-
35
- # If the sentence has 8 words or fewer, add as is
36
  if len(words) <= 8:
37
  segments.append(sentence.strip())
38
  else:
39
- # Split longer sentences into chunks of max 8 words without splitting words
40
  chunk = ""
41
  for word in words:
42
  if len(chunk.split()) < 8:
@@ -47,7 +40,6 @@ def split_text_into_segments(text):
47
  if chunk:
48
  segments.append(chunk.strip())
49
 
50
- # Handle any leftover sentence fragment not followed by punctuation
51
  if len(raw_segments) % 2 == 1:
52
  remaining_text = raw_segments[-1].strip()
53
  if remaining_text:
@@ -55,23 +47,19 @@ def split_text_into_segments(text):
55
 
56
  return segments
57
 
58
- # Function to generate SRT with accurate timing per batch
59
  async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
60
  audio_file = f"batch_{batch_num}_audio.wav"
61
 
62
- # Generate the audio using edge-tts
63
  tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
64
  await tts.save(audio_file)
65
 
66
- # Get the actual length of the audio file
67
- actual_length = get_audio_length(audio_file)
68
 
69
- # Split the text into segments based on punctuation and word count
70
  segments = split_text_into_segments(batch_text)
71
- segment_duration = actual_length / len(segments) # Duration per segment
72
  start_time = start_offset
73
 
74
- # Initialize SRT content
75
  srt_content = ""
76
  for index, segment in enumerate(segments):
77
  end_time = start_time + segment_duration
@@ -80,14 +68,14 @@ async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate
80
  end_time = start_offset + actual_length
81
 
82
  srt_content += f"{index + 1 + (batch_num * 100)}\n"
83
- srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
84
  srt_content += segment + "\n\n"
85
 
86
  start_time = end_time
87
 
88
  return srt_content, audio_file, start_time
89
 
90
- # Batch processing function
91
  async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
92
  batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
93
  all_srt_content = ""
@@ -114,7 +102,7 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
114
  end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
115
  if end_time > total_audio_length:
116
  end_time = total_audio_length
117
- line = f"{format_time(start_time)} --> {format_time(end_time)}"
118
  validated_srt_content += line + "\n"
119
 
120
  unique_id = uuid.uuid4()
@@ -130,7 +118,6 @@ async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=
130
 
131
  # Gradio interface function
132
  async def process_script(script_text, pitch, rate, voice):
133
- # Format pitch correctly for edge-tts
134
  pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
135
  formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
136
  srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
@@ -138,44 +125,8 @@ async def process_script(script_text, pitch, rate, voice):
138
 
139
  # Gradio interface setup
140
  voice_options = {
141
- "Andrew Male": "en-US-AndrewNeural",
142
  "Jenny Female": "en-US-JennyNeural",
143
- "Guy Male": "en-US-GuyNeural",
144
- "Ana Female": "en-US-AnaNeural",
145
- "Aria Female": "en-US-AriaNeural",
146
- "Brian Male": "en-US-BrianNeural",
147
- "Christopher Male": "en-US-ChristopherNeural",
148
- "Eric Male": "en-US-EricNeural",
149
- "Michelle Male": "en-US-MichelleNeural",
150
- "Roger Male": "en-US-RogerNeural",
151
- "Natasha Female": "en-AU-NatashaNeural",
152
- "William Male": "en-AU-WilliamNeural",
153
- "Clara Female": "en-CA-ClaraNeural",
154
- "Liam Female ": "en-CA-LiamNeural",
155
- "Libby Female": "en-GB-LibbyNeural",
156
- "Maisie": "en-GB-MaisieNeural",
157
- "Ryan": "en-GB-RyanNeural",
158
- "Sonia": "en-GB-SoniaNeural",
159
- "Thomas": "en-GB-ThomasNeural",
160
- "Sam": "en-HK-SamNeural",
161
- "Yan": "en-HK-YanNeural",
162
- "Connor": "en-IE-ConnorNeural",
163
- "Emily": "en-IE-EmilyNeural",
164
- "Neerja": "en-IN-NeerjaNeural",
165
- "Prabhat": "en-IN-PrabhatNeural",
166
- "Asilia": "en-KE-AsiliaNeural",
167
- "Chilemba": "en-KE-ChilembaNeural",
168
- "Abeo": "en-NG-AbeoNeural",
169
- "Ezinne": "en-NG-EzinneNeural",
170
- "Mitchell": "en-NZ-MitchellNeural",
171
- "James": "en-PH-JamesNeural",
172
- "Rosa": "en-PH-RosaNeural",
173
- "Luna": "en-SG-LunaNeural",
174
- "Wayne": "en-SG-WayneNeural",
175
- "Elimu": "en-TZ-ElimuNeural",
176
- "Imani": "en-TZ-ImaniNeural",
177
- "Leah": "en-ZA-LeahNeural",
178
- "Luke": "en-ZA-LukeNeural"
179
  # Add other voices here...
180
  }
181
 
@@ -192,8 +143,8 @@ app = gr.Interface(
192
  gr.File(label="Download Audio File"),
193
  gr.Audio(label="Audio Playback")
194
  ],
195
- title="HIVEcorp Text-to-Speech with SRT Generation",
196
- description="Convert your script into audio and generate subtitles.",
197
  theme="compact",
198
  )
199
 
 
6
  import uuid
7
  import re
8
 
9
+ # Function to get the length of an audio file in milliseconds
10
  def get_audio_length(audio_file):
11
  audio = AudioSegment.from_file(audio_file)
12
+ return len(audio) / 1000 # Return in seconds for compatibility
13
+
14
+ # Function to format time for SRT in milliseconds
15
+ def format_time_ms(milliseconds):
16
+ seconds, ms = divmod(int(milliseconds), 1000)
17
+ mins, secs = divmod(seconds, 60)
18
+ hrs, mins = divmod(mins, 60)
19
+ return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
20
+
21
+ # Function to split text into segments based on punctuation, ensuring no word is split
 
 
22
  def split_text_into_segments(text):
 
23
  segments = []
24
  raw_segments = re.split(r'([.!?,])', text)
25
+
 
26
  for i in range(0, len(raw_segments) - 1, 2):
 
27
  sentence = raw_segments[i].strip() + raw_segments[i + 1]
28
  words = sentence.split()
29
+
 
30
  if len(words) <= 8:
31
  segments.append(sentence.strip())
32
  else:
 
33
  chunk = ""
34
  for word in words:
35
  if len(chunk.split()) < 8:
 
40
  if chunk:
41
  segments.append(chunk.strip())
42
 
 
43
  if len(raw_segments) % 2 == 1:
44
  remaining_text = raw_segments[-1].strip()
45
  if remaining_text:
 
47
 
48
  return segments
49
 
50
+ # Function to generate SRT with millisecond accuracy per batch
51
  async def generate_accurate_srt(batch_text, batch_num, start_offset, pitch, rate, voice):
52
  audio_file = f"batch_{batch_num}_audio.wav"
53
 
 
54
  tts = edge_tts.Communicate(batch_text, voice, rate=rate, pitch=pitch)
55
  await tts.save(audio_file)
56
 
57
+ actual_length = get_audio_length(audio_file) * 1000 # Convert to milliseconds
 
58
 
 
59
  segments = split_text_into_segments(batch_text)
60
+ segment_duration = actual_length / len(segments)
61
  start_time = start_offset
62
 
 
63
  srt_content = ""
64
  for index, segment in enumerate(segments):
65
  end_time = start_time + segment_duration
 
68
  end_time = start_offset + actual_length
69
 
70
  srt_content += f"{index + 1 + (batch_num * 100)}\n"
71
+ srt_content += f"{format_time_ms(start_time)} --> {format_time_ms(end_time)}\n"
72
  srt_content += segment + "\n\n"
73
 
74
  start_time = end_time
75
 
76
  return srt_content, audio_file, start_time
77
 
78
+ # Batch processing function with millisecond accuracy
79
  async def batch_process_srt_and_audio(script_text, pitch, rate, voice, progress=gr.Progress()):
80
  batches = [script_text[i:i + 500] for i in range(0, len(script_text), 500)]
81
  all_srt_content = ""
 
102
  end_time = sum(x * float(t) for x, t in zip([3600, 60, 1, 0.001], end_str.replace(',', ':').split(':')))
103
  if end_time > total_audio_length:
104
  end_time = total_audio_length
105
+ line = f"{format_time_ms(start_time * 1000)} --> {format_time_ms(end_time * 1000)}"
106
  validated_srt_content += line + "\n"
107
 
108
  unique_id = uuid.uuid4()
 
118
 
119
  # Gradio interface function
120
  async def process_script(script_text, pitch, rate, voice):
 
121
  pitch_str = f"{pitch}Hz" if pitch != 0 else "-1Hz"
122
  formatted_rate = f"{'+' if rate > 1 else ''}{int(rate)}%"
123
  srt_path, audio_path = await batch_process_srt_and_audio(script_text, pitch_str, formatted_rate, voice_options[voice])
 
125
 
126
  # Gradio interface setup
127
  voice_options = {
128
+ "Andrew Male": "en-US-AndrewNeural",
129
  "Jenny Female": "en-US-JennyNeural",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # Add other voices here...
131
  }
132
 
 
143
  gr.File(label="Download Audio File"),
144
  gr.Audio(label="Audio Playback")
145
  ],
146
+ title="HIVEcorp Text-to-Speech with Millisecond SRT Generation",
147
+ description="Convert your script into audio and generate millisecond-accurate subtitles.",
148
  theme="compact",
149
  )
150