hivecorp commited on
Commit
8437f06
·
verified ·
1 Parent(s): 7b61720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -26,25 +26,24 @@ async def text_to_speech(text, voice, rate, pitch):
26
  await communicate.save(tmp_path)
27
  return tmp_path, None
28
 
29
- # Generate SRT file with 2 lines of subtitles containing 14-16 words total
30
- def generate_srt(words, audio_duration, srt_path):
31
  with open(srt_path, 'w', encoding='utf-8') as srt_file:
32
- segment_duration = audio_duration / (len(words) // 16) # Average duration for each 16 words
33
  current_time = 0
34
 
35
- for i in range(0, len(words), 22): # Every 16 words for two lines
36
- line1 = ' '.join(words[i:i + 10]) # First line with up to 8 words
37
- line2 = ' '.join(words[i + 12:i + 22]) # Second line with the next 8 words
 
38
 
39
- if line2.strip() == '':
40
- line2 = '' # Handle case where there aren't enough words for the second line
41
-
42
  start_time = current_time
43
- end_time = start_time + segment_duration # Adjust duration for one line
44
 
45
  start_time_str = format_srt_time(start_time)
46
  end_time_str = format_srt_time(end_time)
47
- srt_file.write(f"{i // 22 + 1}\n{start_time_str} --> {end_time_str}\n{line1}\n{line2}\n\n")
 
48
 
49
  current_time += segment_duration # Update current time for the next segment
50
 
@@ -60,7 +59,7 @@ def format_srt_time(seconds):
60
  return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
61
 
62
  # Text to audio and SRT functionality
63
- async def text_to_audio_and_srt(text, voice, rate, pitch):
64
  audio_path, warning = await text_to_speech(text, voice, rate, pitch)
65
  if warning:
66
  return None, None, warning
@@ -72,13 +71,13 @@ async def text_to_audio_and_srt(text, voice, rate, pitch):
72
  base_name = os.path.splitext(audio_path)[0]
73
  srt_path = f"{base_name}_subtitle.srt"
74
  words = text.split()
75
- generate_srt(words, audio_duration, srt_path)
76
 
77
  return audio_path, srt_path, None
78
 
79
  # Gradio interface function
80
- def tts_interface(text, voice, rate, pitch):
81
- audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch))
82
  return audio_path, srt_path, warning
83
 
84
  # Create Gradio app
@@ -100,6 +99,9 @@ async def create_demo():
100
  voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
101
  rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
102
  pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
 
 
 
103
 
104
  generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
105
 
@@ -110,7 +112,7 @@ async def create_demo():
110
 
111
  generate_button.click(
112
  fn=tts_interface,
113
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
114
  outputs=[output_audio, output_srt, warning_msg]
115
  )
116
 
 
26
  await communicate.save(tmp_path)
27
  return tmp_path, None
28
 
29
+ # Generate SRT file based on user preferences
30
+ def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
31
  with open(srt_path, 'w', encoding='utf-8') as srt_file:
32
+ segment_duration = audio_duration / (len(words) // (words_per_line * lines_per_paragraph)) # Average duration for each segment
33
  current_time = 0
34
 
35
+ for i in range(0, len(words), words_per_line * lines_per_paragraph): # Every segment according to specified preferences
36
+ segment_words = words[i:i + (words_per_line * lines_per_paragraph)]
37
+ lines = [segment_words[j:j + words_per_line] for j in range(0, len(segment_words), words_per_line)]
38
+ lines = [' '.join(line) for line in lines]
39
 
 
 
 
40
  start_time = current_time
41
+ end_time = start_time + segment_duration
42
 
43
  start_time_str = format_srt_time(start_time)
44
  end_time_str = format_srt_time(end_time)
45
+ srt_file.write(f"{i // (words_per_line * lines_per_paragraph) + 1}\n{start_time_str} --> {end_time_str}\n")
46
+ srt_file.write('\n'.join(lines) + '\n\n')
47
 
48
  current_time += segment_duration # Update current time for the next segment
49
 
 
59
  return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
60
 
61
  # Text to audio and SRT functionality
62
+ async def text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
63
  audio_path, warning = await text_to_speech(text, voice, rate, pitch)
64
  if warning:
65
  return None, None, warning
 
71
  base_name = os.path.splitext(audio_path)[0]
72
  srt_path = f"{base_name}_subtitle.srt"
73
  words = text.split()
74
+ generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph)
75
 
76
  return audio_path, srt_path, None
77
 
78
  # Gradio interface function
79
+ def tts_interface(text, voice, rate, pitch, words_per_line, lines_per_paragraph):
80
+ audio_path, srt_path, warning = asyncio.run(text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_per_paragraph))
81
  return audio_path, srt_path, warning
82
 
83
  # Create Gradio app
 
99
  voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
100
  rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
101
  pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
102
+
103
+ words_per_line = gr.Slider(minimum=8, maximum=10, value=8, label="Words per Line", step=1)
104
+ lines_per_paragraph = gr.Slider(minimum=1, maximum=4, value=2, label="Lines per Paragraph", step=1)
105
 
106
  generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")
107
 
 
112
 
113
  generate_button.click(
114
  fn=tts_interface,
115
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider, words_per_line, lines_per_paragraph],
116
  outputs=[output_audio, output_srt, warning_msg]
117
  )
118