Spaces:
Running
Running
File size: 5,345 Bytes
ff5aa27 dfe2ca6 42af183 ff5aa27 dfe2ca6 42af183 ff5aa27 f7e7a16 ff5aa27 f7e7a16 ff5aa27 f7e7a16 dfe2ca6 ff5aa27 f7e7a16 ff5aa27 f7e7a16 ff5aa27 f7e7a16 dfe2ca6 ff5aa27 f7e7a16 ff5aa27 f7e7a16 ff5aa27 f7e7a16 dfe2ca6 ff5aa27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import whisper
from modules.subtitle_manager import get_srt,get_vtt,write_srt,write_vtt,safe_filename
from modules.youtube_manager import get_ytdata,get_ytaudio
import gradio as gr
import os
from datetime import datetime
DEFAULT_MODEL_SIZE="large-v2"
class WhisperInference():
def __init__(self):
print("\nInitializing Model..\n")
self.current_model_size = DEFAULT_MODEL_SIZE
self.model = whisper.load_model(name=DEFAULT_MODEL_SIZE,download_root="models")
self.available_models = whisper.available_models()
self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
def transcribe_file(self,fileobj
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(name=model_size,download_root="models")
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio..")
audio = whisper.load_audio(fileobj.name)
if istranslate == True:
result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
file_name = file_name[:-9]
file_name = safe_filename(file_name)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/{file_name}-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle is in the outputs folder.\n\n{subtitle}"
def transcribe_youtube(self,youtubelink
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(name=model_size,download_root="models")
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio from Youtube..")
yt = get_ytdata(youtubelink)
audio = whisper.load_audio(get_ytaudio(yt))
if istranslate == True:
result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
file_name = safe_filename(yt.title)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/{file_name}-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
def transcribe_mic(self,micaudio
,model_size,lang,subformat,istranslate,
progress=gr.Progress()):
def progress_callback(progress_value):
progress(progress_value,desc="Transcribing..")
if model_size != self.current_model_size:
progress(0,desc="Initializing Model..")
self.current_model_size = model_size
self.model = whisper.load_model(name=model_size,download_root="models")
if lang == "Automatic Detection" :
lang = None
progress(0,desc="Loading Audio..")
if istranslate == True:
result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
else :
result = self.model.transcribe(audio=micaudio,language=lang,verbose=False,progress_callback=progress_callback)
progress(1,desc="Completed!")
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"outputs/Mic-{timestamp}"
if subformat == "SRT":
subtitle = get_srt(result["segments"])
write_srt(subtitle,f"{output_path}.srt")
elif subformat == "WebVTT":
subtitle = get_vtt(result["segments"])
write_vtt(subtitle,f"{output_path}.vtt")
return f"Done! Subtitle file is in the outputs folder.\n\n{subtitle}"
|