File size: 1,697 Bytes
a232255
70eff1f
a232255
 
 
 
7cac441
 
a232255
 
 
 
 
 
c12131e
a232255
 
 
 
 
 
 
 
 
 
 
 
 
 
fcfd302
a232255
7cac441
27d65f0
5a2fae2
60e8ae0
27d65f0
a232255
fcfd302
a232255
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import streamlit as st
import torchaudio
import speechbrain as sb
from speechbrain.dataio.dataio import read_audio
from IPython.display import Audio
from speechbrain.pretrained import SepformerSeparation as separator
import scipy.io.wavfile as wavfile
#import io

# Load the pretrained model
model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement')

# Define the Streamlit app
def app():
    st.title("Noiz: Audio Enhancer")

    # Add a file uploader to allow the user to select an audio file
    uploaded_file = st.file_uploader("Choose an audio file", type=["wav"])

    # If an audio file is uploaded, perform speech enhancement and play the results
    if uploaded_file is not None:
        # Load the uploaded audio file
        audio_bytes = uploaded_file.read()
        with open("uploaded_audio.wav", "wb") as f:
            f.write(audio_bytes)
        signal = read_audio("uploaded_audio.wav").squeeze()

        # Perform speech enhancement using the Sepformer model
        enhanced_speech = model.separate_file(path='uploaded_audio.wav')
        enhanced_signal = enhanced_speech[:, :].detach().cpu().squeeze()
        # Play the original and enhanced audio
        sample_rate = 8000
        enhanced_signal = enhanced_signal.reshape((1,enhanced_signal.shape[0]))
        torchaudio.save('enhanced_sound.wav', enhanced_signal, sample_rate)        
        with open('enhanced_sound.wav', 'rb') as f:
            enhanced_byte = f.read()
        st.audio(audio_bytes, format='audio/wav')
        st.audio(enhanced_byte, format='audio/wav')

# Run the Streamlit app
if __name__ == '__main__':
    app()