File size: 1,468 Bytes
faec829
bcd3c3e
601f74f
e746d10
 
 
 
 
 
 
 
 
 
ac76be2
d4e4acc
 
9cb5903
d4e4acc
98314f0
 
e746d10
28dfd94
e746d10
98314f0
d4e4acc
faec829
ac76be2
e906ac8
 
 
 
 
 
 
e746d10
 
 
 
e906ac8
ee14c57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import gradio
from transformers import pipeline

# Merge split tokens starting with '##'
def merge_split_tokens(tokens):
  merged_tokens = []
  for token in tokens:
    if token["word"].startswith('##'):
      merged_tokens[-1]["word"] += token["word"][2:]
    else:
      merged_tokens.append(token)
  return merged_tokens

def process_swedish_text(text):
  # Models from https://huggingface.co./models
  # https://huggingface.co./KBLab/bert-base-swedish-cased-ner
  nlp = pipeline('ner', model='KBLab/bert-base-swedish-cased-ner', tokenizer='KBLab/bert-base-swedish-cased-ner')
  # Run NER
  nlp_results = nlp(text)
  print('nlp_results:', nlp_results)
  nlp_results_merged = merge_split_tokens(nlp_results)
  # Fix TypeError("'numpy.float32' object is not iterable")
  nlp_results_adjusted = map(lambda entity: dict(entity, **{ 'score': float(entity['score']) }), nlp_results_merged)
  print('nlp_results_adjusted:', nlp_results_adjusted)
  # Return values
  return {'entities': list(nlp_results_adjusted)}

gradio_interface = gradio.Interface(
  fn=process_swedish_text,
  inputs="text",
  outputs="json",
  examples=[
    ["Jag heter Tom och bor i Stockholm."],
    ["Groens malmgård är en av Stockholms malmgårdar, belägen vid Malmgårdsvägen 53 på Södermalm i Stockholm."]
  ],
  title="Swedish Entity Recognition",
  description="Recognizing Swedish tokens e.g. locations and person names.",
  article="© Tom Söderlund 2022"
)
gradio_interface.launch()