Spaces:

sp-uhh
/

urusha

Runtime error

File size: 2,278 Bytes

c65e0e9
dfb36ea
 
 
 
dd32858
 
 
 
 
 
 
 
 
 
 
 
dfb36ea
f61386b
dd32858
dfb36ea
 
 
dd32858
 
 
dfb36ea
 
 
 
 
dd32858
dfb36ea
 
 
 
 
 
 
 
 
dd32858
 
dfb36ea
 
 
 
 
 
c65e0e9
dd32858
dfb36ea
 
 
 
 
 
f61386b
dd32858

import torch
import torchaudio
from sgmse.model import ScoreModel
import gradio as gr
from sgmse.util.other import pad_spec

# Define the necessary arguments
class Args:
    device = 'cpu'  # or 'cuda' if GPU is available and enabled in the environment
    corrector = 'langevin'  # Define your corrector method
    N = 50  # Example value for number of steps
    corrector_steps = 1  # Number of steps for the corrector
    snr = 0.1  # Signal-to-noise ratio value for the corrector
    pad_mode = 'reflect'  # Pad mode for spectrogram padding

args = Args()

# Load the pre-trained model
model = ScoreModel.load_from_checkpoint("https://huggingface.co./sp-uhh/speech-enhancement-sgmse/resolve/main/train_vb_29nqe0uh_epoch%3D115.ckpt")

def enhance_speech(audio_file):
    # Load and process the audio file
    y, sr = torchaudio.load(audio_file)
    T_orig = y.size(1)   

    # Normalize
    norm_factor = y.abs().max()
    y = y / norm_factor
    
    # Prepare DNN input
    Y = torch.unsqueeze(model._forward_transform(model._stft(y.to(args.device))), 0)
    Y = pad_spec(Y, mode=args.pad_mode)
    
    # Reverse sampling
    sampler = model.get_pc_sampler(
        'reverse_diffusion', args.corrector, Y.to(args.device), N=args.N, 
        corrector_steps=args.corrector_steps, snr=args.snr)
    sample, _ = sampler()
    
    # Backward transform in time domain
    x_hat = model.to_audio(sample.squeeze(), T_orig)

    # Renormalize
    x_hat = x_hat * norm_factor
    
    # Save the enhanced audio
    output_file = 'enhanced_output.wav'
    torchaudio.save(output_file, x_hat.cpu().numpy(), sr)
    
    return output_file

# Gradio interface setup
inputs = gr.Audio(label="Input Audio", type="filepath")
outputs = gr.Audio(label="Output Audio", type="filepath")
title = "Speech Enhancement using SGMSE"
description = "This Gradio demo uses the SGMSE model for speech enhancement. Upload your audio file to enhance it."
article = "<p style='text-align: center'><a href='https://huggingface.co./SP-UHH/speech-enhancement-sgmse' target='_blank'>Model Card</a></p>"

# Launch without share=True (as it's not supported on Hugging Face Spaces)
gr.Interface(fn=enhance_speech, inputs=inputs, outputs=outputs, title=title, description=description, article=article).launch()