Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingsound import SpeechRecognitionModel | |
from transformers import logging | |
from transformers import pipeline | |
from transformers import BertTokenizer, BertModel | |
from pydub import AudioSegment | |
unmasker = pipeline('fill-mask', model='bert-base-uncased') | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertModel.from_pretrained("bert-base-uncased") | |
import os | |
def levenshtein_distance(s, t): | |
m, n = len(s), len(t) | |
d = [[0] * (n+1) for _ in range(m+1)] | |
for i in range(m+1): | |
d[i][0] = i | |
for j in range(n+1): | |
d[0][j] = j | |
for j in range(1, n+1): | |
for i in range(1, m+1): | |
if s[i-1] == t[j-1]: | |
d[i][j] = d[i-1][j-1] | |
else: | |
d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1]) | |
return d[m][n] | |
def collate(input): | |
pun_marks = [",", ".", "?", "!", ";", ":", "-", "β", "(", ")", "[", "]", "{", "}", "'", "\"", "`"] | |
output = "" | |
Capital = True | |
Dash = False | |
for i in range(len(input)): | |
if input[i] in pun_marks: | |
output += input[i] | |
if input[i] in [".", "("]: | |
Capital = True | |
if input[i] in ["-", "'"]: | |
Dash = True | |
else: | |
Dash = False | |
else: | |
str = "" | |
if (Dash == False): | |
str += " " | |
if Capital: | |
str += input[i].capitalize() | |
Capital = False | |
else: | |
str += input[i] | |
output += str | |
return output | |
def everything(audio_paths): | |
w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english") | |
logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning | |
# https://huggingface.co./jonatasgrosman/wav2vec2-large-xlsr-53-english | |
# https://huggingface.co./bert-base-uncased | |
transcriptions = w2vmodel.transcribe(audio_paths) | |
return transcriptions | |
# input = transcriptions[0]["transcription"] | |
# input = input.split() | |
# #(1) is a strategy where tokens are used to determine lexicographic distance | |
# #(2) is a strategy where replaced words | |
# for t in range(1): | |
# # output = [] #(2) | |
# for i in range(len(input)): | |
# temp = input[i] | |
# token = tokenizer(temp)['input_ids'][1] | |
# input[i] = "[MASK]" | |
# apiint = unmasker(' '.join(input)) | |
# dist = [] | |
# for r in range(5): | |
# # if (np.abs((apiint[r]['token'] - token)) < 2): #(1) | |
# dist.append(levenshtein_distance(temp, apiint[r]['token_str'])) | |
# lindex = 0 | |
# l = dist[0] | |
# for r in range(5): | |
# if dist[r] < l: | |
# lindex = r | |
# l = dist[r] | |
# if l <= 2: | |
# input[i] = apiint[lindex]['token_str'] | |
# # output.append(apiint[lindex]['token_str']) #(2) | |
# else: | |
# input[i] = temp | |
# # output.append(temp) #(2) | |
# # input[i] = temp #(2) | |
# for t in range(1): | |
# inndex = 1 | |
# for i in range(len(input)): | |
# input.insert(inndex, "[MASK]") | |
# # print(' '.join(input)) | |
# apiint = unmasker(' '.join(input)) | |
# if (apiint[0]['token'] < 1500): | |
# input[inndex] = apiint[0]["token_str"] | |
# inndex += 2 | |
# else: | |
# del input[inndex] | |
# inndex += 1 | |
# st.write(collate(input)) | |
# # In comparison, a plain autocorrect gives this output: | |
# # "The b-movie by Jerry Sinclair, the sound of buzzing | |
# # bees, can be heard according to all known laws of | |
# # aviation that is no way for b to be able to fly its | |
# # wings are too small to get its start little body off | |
# # the ground, the be, of course, flies anyway because `` | |
# # bees don't care what humans think is possible. | |
# # Barbuda is guaranteed one member of the House of | |
# # Representatives and two members of the Senate." | |
# # - https://huggingface.co./oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion | |
demo = gr.Interface(fn=everything, | |
inputs = [gr.UploadButton], | |
outputs = ["text"]) |