MarcNg commited on
Commit
18e9a58
1 Parent(s): 4ed2048

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +35 -0
  2. output.wav +0 -0
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from tensorflow_tts.inference import AutoProcessor, TFAutoModel
3
+ import tensorflow as tf
4
+
5
+ import numpy as np
6
+ import soundfile as sf
7
+ import yaml
8
+
9
+ processor = AutoProcessor.from_pretrained("MarcNg/fastspeech2-vi-infore")
10
+ fastspeech2 = TFAutoModel.from_pretrained("MarcNg/fastspeech2-vi-infore")
11
+ mb_melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-ljspeech-en")
12
+
13
+ output = "output.wav"
14
+
15
+ st.header("MarcNg/fastspeech2-vi-infore Demo")
16
+
17
+ def tts(text):
18
+ input_ids = processor.text_to_sequence(text)
19
+
20
+ mel_before, mel_after, duration_outputs, _, _ = fastspeech2.inference(
21
+ input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
22
+ speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32),
23
+ speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
24
+ f0_ratios =tf.convert_to_tensor([1.0], dtype=tf.float32),
25
+ energy_ratios =tf.convert_to_tensor([1.0], dtype=tf.float32),
26
+ )
27
+ return mel_after
28
+
29
+ text = st.text_input("Text to process")
30
+
31
+ if st.button("Speak"):
32
+ mel_after = tts(text)
33
+ audio_after = mb_melgan.inference(mel_after)[0, :, 0]
34
+ sf.write(output, audio_after, 22050, 'PCM_16')
35
+ st.audio(output, format='audio/wav')
output.wav ADDED
Binary file (48.7 kB). View file