Update README.md
Browse files
README.md
CHANGED
@@ -14,4 +14,30 @@ Swedish version of Wav2Vec2.0 XLSR finetuned on NST Swedish Dictation and evalua
|
|
14 |
Does not work in the browser for some reason, but can be used as follows (code somewhat copied from Huggingface):
|
15 |
|
16 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
```
|
|
|
14 |
Does not work in the browser for some reason, but can be used as follows (code somewhat copied from Huggingface):
|
15 |
|
16 |
```
|
17 |
+
#!/usr/bin/env python3
|
18 |
+
|
19 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
20 |
+
import soundfile as sf
|
21 |
+
from sys import argv,exit
|
22 |
+
import torch
|
23 |
+
import transformers
|
24 |
+
from os.path import basename
|
25 |
+
|
26 |
+
if __name__ == '__main__':
|
27 |
+
if len(argv) < 3:
|
28 |
+
print(f'usage: {argv[0]} <model> <file 1>')
|
29 |
+
exit(1)
|
30 |
+
|
31 |
+
processor = Wav2Vec2Processor.from_pretrained(argv[1])
|
32 |
+
model = Wav2Vec2ForCTC.from_pretrained(argv[1])
|
33 |
+
|
34 |
+
f = argv[2]
|
35 |
+
s,sample_rate = sf.read(f)
|
36 |
+
input_values = processor(s, return_tensors="pt").input_values
|
37 |
+
logits = model(input_values).logits
|
38 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
39 |
+
|
40 |
+
transcription = processor.decode(predicted_ids[0])
|
41 |
+
|
42 |
+
print(transcription)
|
43 |
```
|