|
import streamlit as st |
|
import torchaudio |
|
import speechbrain as sb |
|
from speechbrain.dataio.dataio import read_audio |
|
from IPython.display import Audio |
|
from speechbrain.pretrained import SepformerSeparation as separator |
|
import scipy.io.wavfile as wavfile |
|
|
|
|
|
|
|
model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement') |
|
|
|
|
|
def app(): |
|
st.title("Noiz: Audio Enhancer") |
|
|
|
|
|
uploaded_file = st.file_uploader("Choose an audio file", type=["wav"]) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
|
audio_bytes = uploaded_file.read() |
|
with open("uploaded_audio.wav", "wb") as f: |
|
f.write(audio_bytes) |
|
signal = read_audio("uploaded_audio.wav").squeeze() |
|
|
|
|
|
enhanced_speech = model.separate_file(path='uploaded_audio.wav') |
|
enhanced_signal = enhanced_speech[:, :].detach().cpu().squeeze() |
|
|
|
sample_rate = 8000 |
|
enhanced_signal = enhanced_signal.reshape((1,enhanced_signal.shape[0])) |
|
torchaudio.save('enhanced_sound.wav', enhanced_signal, sample_rate) |
|
with open('enhanced_sound.wav', 'rb') as f: |
|
enhanced_byte = f.read() |
|
st.audio(audio_bytes, format='audio/wav') |
|
st.audio(enhanced_byte, format='audio/wav') |
|
|
|
|
|
if __name__ == '__main__': |
|
app() |
|
|