Spaces:

hivecorp
/

master-tts-pro

Sleeping

App Files Files Community

hivecorp commited on Nov 3, 2024

Commit

85eaa57

verified ·

1 Parent(s): ced46ea

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -10

app.py CHANGED Viewed

@@ -19,11 +19,11 @@ def format_time(seconds):
     return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
 # Function to generate SRT with accurate timing per batch
-async def generate_accurate_srt(batch_text, batch_num):
     audio_file = f"batch_{batch_num}_audio.wav"
     # Generate the audio using edge-tts
-    tts = edge_tts.Communicate(batch_text, "en-US-JennyNeural")
     await tts.save(audio_file)
     # Get the actual length of the audio file
@@ -32,33 +32,35 @@ async def generate_accurate_srt(batch_text, batch_num):
     # Initialize SRT content
     srt_content = ""
     words = batch_text.split()
-    start_time = 0.0
-    segment_duration = actual_length / len(words) * 10  # Assuming ~10 words per SRT segment
     # Build SRT content with accurate timing
     for i in range(0, len(words), 10):
         segment_words = words[i:i+10]
         end_time = start_time + segment_duration
-        srt_content += f"{i // 10 + 1}\n"
         srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
         srt_content += " ".join(segment_words) + "\n\n"
         start_time = end_time
-    return srt_content, audio_file
 # Batch processing function for SRT and audio generation
 async def batch_process_srt_and_audio(script_text):
     batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
     all_srt_content = ""
     combined_audio = AudioSegment.empty()
     for batch_num, batch_text in enumerate(batches):
-        srt_content, audio_file = await generate_accurate_srt(batch_text, batch_num)
         all_srt_content += srt_content
         # Append the audio of each batch to the combined audio
         batch_audio = AudioSegment.from_file(audio_file)
         combined_audio += batch_audio
         # Clean up the individual batch audio file
         os.remove(audio_file)
@@ -73,7 +75,7 @@ async def batch_process_srt_and_audio(script_text):
 # Gradio interface function
 async def process_script(script_text):
     srt_path, audio_path = await batch_process_srt_and_audio(script_text)
-    return srt_path, audio_path
 # Gradio interface setup
 app = gr.Interface(
@@ -81,9 +83,10 @@ app = gr.Interface(
     inputs=gr.Textbox(label="Enter Script Text", lines=10),
     outputs=[
         gr.File(label="Download SRT File"),
-        gr.File(label="Download Audio File")
     ],
-    description="Upload your script text, and the app will generate audio and an accurate SRT file for download."
 )
 app.launch()

     return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
 # Function to generate SRT with accurate timing per batch
+async def generate_accurate_srt(batch_text, batch_num, start_offset):
     audio_file = f"batch_{batch_num}_audio.wav"
     # Generate the audio using edge-tts
+    tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
     await tts.save(audio_file)
     # Get the actual length of the audio file
     # Initialize SRT content
     srt_content = ""
     words = batch_text.split()
+    segment_duration = actual_length / len(words) * 10  # Adjusted for ~10 words per SRT segment
+    start_time = start_offset
     # Build SRT content with accurate timing
     for i in range(0, len(words), 10):
         segment_words = words[i:i+10]
         end_time = start_time + segment_duration
+        srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
         srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
         srt_content += " ".join(segment_words) + "\n\n"
         start_time = end_time
+    return srt_content, audio_file, start_time
 # Batch processing function for SRT and audio generation
 async def batch_process_srt_and_audio(script_text):
     batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
     all_srt_content = ""
     combined_audio = AudioSegment.empty()
+    start_offset = 0.0  # Track cumulative time offset for SRT timing
     for batch_num, batch_text in enumerate(batches):
+        srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
         all_srt_content += srt_content
         # Append the audio of each batch to the combined audio
         batch_audio = AudioSegment.from_file(audio_file)
         combined_audio += batch_audio
+        start_offset = end_offset  # Update the start offset for the next batch
         # Clean up the individual batch audio file
         os.remove(audio_file)
 # Gradio interface function
 async def process_script(script_text):
     srt_path, audio_path = await batch_process_srt_and_audio(script_text)
+    return srt_path, audio_path, audio_path
 # Gradio interface setup
 app = gr.Interface(
     inputs=gr.Textbox(label="Enter Script Text", lines=10),
     outputs=[
         gr.File(label="Download SRT File"),
+        gr.File(label="Download Audio File"),
+        gr.Audio(label="Play Audio")
     ],
+    description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
 )
 app.launch()