Spaces:
Running
Running
import gradio as gr | |
from zeroshot import process, ZS_EXAMPLES | |
with gr.Blocks(css="style.css") as demo: | |
gr.Markdown( | |
"<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo. See our arXiV <a href='https://arxiv.org/'>paper</a> for model details.</p>" | |
) | |
gr.HTML( | |
"""<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.<br>We recommend having a minimum of 5000 distinct words in the textfile to acheive a good performance.</center>""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)") | |
with gr.Row(): | |
words_file = gr.File(label="Text Data") | |
lm_file = gr.File(label="Language Model\n(optional)") | |
with gr.Accordion("Advanced Settings", open=False): | |
gr.Markdown( | |
"The following parameters are used for beam-search decoding. Use the default values if you are not sure." | |
) | |
with gr.Row(): | |
wscore = gr.Slider( | |
minimum=-10.0, | |
maximum=10.0, | |
value=0, | |
step=0.1, | |
interactive=True, | |
label="Word Insertion Score", | |
) | |
lmscore = gr.Slider( | |
minimum=-10.0, | |
maximum=10.0, | |
value=0, | |
step=0.1, | |
interactive=True, | |
label="Language Model Score", | |
) | |
with gr.Row(): | |
wscore_usedefault = gr.Checkbox( | |
label="Use Default Word Insertion Score", value=True | |
) | |
lmscore_usedefault = gr.Checkbox( | |
label="Use Default Language Model Score", value=True | |
) | |
btn = gr.Button("Submit", elem_id="submit") | |
with gr.Column(): | |
text = gr.Textbox(label="Transcript") | |
btn.click( | |
process, | |
inputs=[ | |
audio, | |
words_file, | |
lm_file, | |
wscore, | |
lmscore, | |
wscore_usedefault, | |
lmscore_usedefault, | |
], | |
outputs=text, | |
) | |
examples = gr.Examples(examples=ZS_EXAMPLES, inputs=[audio, words_file]) | |
demo.launch() | |