hivecorp commited on
Commit
85eaa57
·
verified ·
1 Parent(s): ced46ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -19,11 +19,11 @@ def format_time(seconds):
19
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
 
21
  # Function to generate SRT with accurate timing per batch
22
- async def generate_accurate_srt(batch_text, batch_num):
23
  audio_file = f"batch_{batch_num}_audio.wav"
24
 
25
  # Generate the audio using edge-tts
26
- tts = edge_tts.Communicate(batch_text, "en-US-JennyNeural")
27
  await tts.save(audio_file)
28
 
29
  # Get the actual length of the audio file
@@ -32,33 +32,35 @@ async def generate_accurate_srt(batch_text, batch_num):
32
  # Initialize SRT content
33
  srt_content = ""
34
  words = batch_text.split()
35
- start_time = 0.0
36
- segment_duration = actual_length / len(words) * 10 # Assuming ~10 words per SRT segment
37
 
38
  # Build SRT content with accurate timing
39
  for i in range(0, len(words), 10):
40
  segment_words = words[i:i+10]
41
  end_time = start_time + segment_duration
42
- srt_content += f"{i // 10 + 1}\n"
43
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
  srt_content += " ".join(segment_words) + "\n\n"
45
  start_time = end_time
46
 
47
- return srt_content, audio_file
48
 
49
  # Batch processing function for SRT and audio generation
50
  async def batch_process_srt_and_audio(script_text):
51
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
  all_srt_content = ""
53
  combined_audio = AudioSegment.empty()
 
54
 
55
  for batch_num, batch_text in enumerate(batches):
56
- srt_content, audio_file = await generate_accurate_srt(batch_text, batch_num)
57
  all_srt_content += srt_content
58
 
59
  # Append the audio of each batch to the combined audio
60
  batch_audio = AudioSegment.from_file(audio_file)
61
  combined_audio += batch_audio
 
62
 
63
  # Clean up the individual batch audio file
64
  os.remove(audio_file)
@@ -73,7 +75,7 @@ async def batch_process_srt_and_audio(script_text):
73
  # Gradio interface function
74
  async def process_script(script_text):
75
  srt_path, audio_path = await batch_process_srt_and_audio(script_text)
76
- return srt_path, audio_path
77
 
78
  # Gradio interface setup
79
  app = gr.Interface(
@@ -81,9 +83,10 @@ app = gr.Interface(
81
  inputs=gr.Textbox(label="Enter Script Text", lines=10),
82
  outputs=[
83
  gr.File(label="Download SRT File"),
84
- gr.File(label="Download Audio File")
 
85
  ],
86
- description="Upload your script text, and the app will generate audio and an accurate SRT file for download."
87
  )
88
 
89
  app.launch()
 
19
  return f"{hrs:02}:{mins:02}:{secs:02},{millis:03}"
20
 
21
  # Function to generate SRT with accurate timing per batch
22
+ async def generate_accurate_srt(batch_text, batch_num, start_offset):
23
  audio_file = f"batch_{batch_num}_audio.wav"
24
 
25
  # Generate the audio using edge-tts
26
+ tts = edge_tts.Communicate(batch_text, "en-US-AndrewNeural", rate="-25%")
27
  await tts.save(audio_file)
28
 
29
  # Get the actual length of the audio file
 
32
  # Initialize SRT content
33
  srt_content = ""
34
  words = batch_text.split()
35
+ segment_duration = actual_length / len(words) * 10 # Adjusted for ~10 words per SRT segment
36
+ start_time = start_offset
37
 
38
  # Build SRT content with accurate timing
39
  for i in range(0, len(words), 10):
40
  segment_words = words[i:i+10]
41
  end_time = start_time + segment_duration
42
+ srt_content += f"{i // 10 + 1 + (batch_num * 100)}\n"
43
  srt_content += f"{format_time(start_time)} --> {format_time(end_time)}\n"
44
  srt_content += " ".join(segment_words) + "\n\n"
45
  start_time = end_time
46
 
47
+ return srt_content, audio_file, start_time
48
 
49
  # Batch processing function for SRT and audio generation
50
  async def batch_process_srt_and_audio(script_text):
51
  batches = [script_text[i:i+500] for i in range(0, len(script_text), 500)]
52
  all_srt_content = ""
53
  combined_audio = AudioSegment.empty()
54
+ start_offset = 0.0 # Track cumulative time offset for SRT timing
55
 
56
  for batch_num, batch_text in enumerate(batches):
57
+ srt_content, audio_file, end_offset = await generate_accurate_srt(batch_text, batch_num, start_offset)
58
  all_srt_content += srt_content
59
 
60
  # Append the audio of each batch to the combined audio
61
  batch_audio = AudioSegment.from_file(audio_file)
62
  combined_audio += batch_audio
63
+ start_offset = end_offset # Update the start offset for the next batch
64
 
65
  # Clean up the individual batch audio file
66
  os.remove(audio_file)
 
75
  # Gradio interface function
76
  async def process_script(script_text):
77
  srt_path, audio_path = await batch_process_srt_and_audio(script_text)
78
+ return srt_path, audio_path, audio_path
79
 
80
  # Gradio interface setup
81
  app = gr.Interface(
 
83
  inputs=gr.Textbox(label="Enter Script Text", lines=10),
84
  outputs=[
85
  gr.File(label="Download SRT File"),
86
+ gr.File(label="Download Audio File"),
87
+ gr.Audio(label="Play Audio")
88
  ],
89
+ description="Upload your script text, and the app will generate audio with en-US-AndrewNeural voice (Rate: -25%) and an accurate SRT file for download."
90
  )
91
 
92
  app.launch()