ABOUT = """ # Fast Whisper Turbo ⚡ Ultra-fast Whisper V3 Turbo inference, with enhancements sourced from [insanely-fast-whisper](https://github.com/Vaibhavs10/insanely-fast-whisper). """ CREDITS = """ ## Credits This project was made possible through the work of several other projects: - [insanely-fast-whisper](https://github.com/Vaibhavs10/insanely-fast-whisper) """ import subprocess subprocess.run( "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True, ) # https://huggingface.co./spaces/zero-gpu-explorers/README/discussions/75#666e4681303f0a5d67175a90 import gradio as gr from transformers import pipeline import torch import spaces pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-large-v3-turbo", torch_dtype=torch.float16, device="cuda:0", model_kwargs={"attn_implementation": "flash_attention_2"}, ) @spaces.GPU def transcribe(audio, task): gr.Info("Starting transcription task") outputs = pipe( audio, chunk_length_s=30, batch_size=128, generate_kwargs={"task": task}, return_timestamps=False, ) gr.Info("Finished transcription task") return outputs['text'].strip() with gr.Blocks() as demo: gr.Markdown(ABOUT) audio = gr.Audio(label="Audio", type="filepath", interactive=True) task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe", interactive=True) btn = gr.Button("Transcribe", variant="primary") output = gr.Textbox(label="Transcription", interactive=False) btn.click(transcribe, inputs=[audio, task], outputs=output) gr.Markdown(CREDITS) demo.queue().launch()