Chrysoula's picture
Update app.py
cdb5b64
from transformers import pipeline
import gradio as gr
import pytube as pt
pipe = pipeline(model="Hoft/whisper-small-swedish-asr") # change to "your-username/the-name-you-picked"
sa = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')
def get_emoji(feeling):
if feeling == 'POSITIVE':
return '😊'
else:
return 'πŸ˜”'
def microphone_or_file_transcribe(audio):
text = pipe(audio)["text"]
sa_result = sa(text)[0]
return text, get_emoji(sa_result['label'])
def youtube_transcribe(url):
yt = pt.YouTube(url)
stream = yt.streams.filter(only_audio=True)[0]
stream.download(filename="audio.mp3")
text = pipe("audio.mp3")["text"]
sa_result = sa(text)[0]
return text, get_emoji(sa_result['label'])
app = gr.Blocks()
microphone_tab = gr.Interface(
fn=microphone_or_file_transcribe,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
title="Whisper Small Swedish: Microphone ",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)
youtube_tab = gr.Interface(
fn=youtube_transcribe,
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video", label="URL")],
outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
title="Whisper Small Swedish: Youtube",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)
file_tab = gr.Interface(
fn=microphone_or_file_transcribe,
inputs= gr.inputs.Audio(source="upload", type="filepath"),
outputs=[gr.Textbox(label="Text"), gr.Textbox(label="Feeling")],
title="Whisper Small Swedish: File",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model and Sentiment Analysis.",
)
with app:
gr.TabbedInterface([microphone_tab, youtube_tab, file_tab], ["Microphone", "YouTube", "File"])
app.launch(enable_queue=True)