Spaces:
Runtime error
Runtime error
File size: 4,301 Bytes
1c827e2 168d2de ffe25b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
from huggingsound import SpeechRecognitionModel
from transformers import logging
from transformers import pipeline
from transformers import BertTokenizer, BertModel
from pydub import AudioSegment
unmasker = pipeline('fill-mask', model='bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")
import os
def levenshtein_distance(s, t):
m, n = len(s), len(t)
d = [[0] * (n+1) for _ in range(m+1)]
for i in range(m+1):
d[i][0] = i
for j in range(n+1):
d[0][j] = j
for j in range(1, n+1):
for i in range(1, m+1):
if s[i-1] == t[j-1]:
d[i][j] = d[i-1][j-1]
else:
d[i][j] = 1 + min(d[i-1][j], d[i][j-1], d[i-1][j-1])
return d[m][n]
def collate(input):
pun_marks = [",", ".", "?", "!", ";", ":", "-", "β", "(", ")", "[", "]", "{", "}", "'", "\"", "`"]
output = ""
Capital = True
Dash = False
for i in range(len(input)):
if input[i] in pun_marks:
output += input[i]
if input[i] in [".", "("]:
Capital = True
if input[i] in ["-", "'"]:
Dash = True
else:
Dash = False
else:
str = ""
if (Dash == False):
str += " "
if Capital:
str += input[i].capitalize()
Capital = False
else:
str += input[i]
output += str
return output
def everything(audio_paths):
w2vmodel = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
logging.set_verbosity_error() #change'error' to 'warning' or remove this if you want to see the warning
# https://huggingface.co./jonatasgrosman/wav2vec2-large-xlsr-53-english
# https://huggingface.co./bert-base-uncased
transcriptions = w2vmodel.transcribe(audio_paths)
return transcriptions
# input = transcriptions[0]["transcription"]
# input = input.split()
# #(1) is a strategy where tokens are used to determine lexicographic distance
# #(2) is a strategy where replaced words
# for t in range(1):
# # output = [] #(2)
# for i in range(len(input)):
# temp = input[i]
# token = tokenizer(temp)['input_ids'][1]
# input[i] = "[MASK]"
# apiint = unmasker(' '.join(input))
# dist = []
# for r in range(5):
# # if (np.abs((apiint[r]['token'] - token)) < 2): #(1)
# dist.append(levenshtein_distance(temp, apiint[r]['token_str']))
# lindex = 0
# l = dist[0]
# for r in range(5):
# if dist[r] < l:
# lindex = r
# l = dist[r]
# if l <= 2:
# input[i] = apiint[lindex]['token_str']
# # output.append(apiint[lindex]['token_str']) #(2)
# else:
# input[i] = temp
# # output.append(temp) #(2)
# # input[i] = temp #(2)
# for t in range(1):
# inndex = 1
# for i in range(len(input)):
# input.insert(inndex, "[MASK]")
# # print(' '.join(input))
# apiint = unmasker(' '.join(input))
# if (apiint[0]['token'] < 1500):
# input[inndex] = apiint[0]["token_str"]
# inndex += 2
# else:
# del input[inndex]
# inndex += 1
# st.write(collate(input))
# # In comparison, a plain autocorrect gives this output:
# # "The b-movie by Jerry Sinclair, the sound of buzzing
# # bees, can be heard according to all known laws of
# # aviation that is no way for b to be able to fly its
# # wings are too small to get its start little body off
# # the ground, the be, of course, flies anyway because ``
# # bees don't care what humans think is possible.
# # Barbuda is guaranteed one member of the House of
# # Representatives and two members of the Senate."
# # - https://huggingface.co./oliverguhr/spelling-correction-english-base?text=lets+do+a+comparsion
demo = gr.Interface(fn=everything,
inputs = [gr.UploadButton],
outputs = ["text"]) |