Spaces:
Runtime error
Runtime error
File size: 1,468 Bytes
faec829 bcd3c3e 601f74f e746d10 ac76be2 d4e4acc 9cb5903 d4e4acc 98314f0 e746d10 28dfd94 e746d10 98314f0 d4e4acc faec829 ac76be2 e906ac8 e746d10 e906ac8 ee14c57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio
from transformers import pipeline
# Merge split tokens starting with '##'
def merge_split_tokens(tokens):
merged_tokens = []
for token in tokens:
if token["word"].startswith('##'):
merged_tokens[-1]["word"] += token["word"][2:]
else:
merged_tokens.append(token)
return merged_tokens
def process_swedish_text(text):
# Models from https://huggingface.co./models
# https://huggingface.co./KBLab/bert-base-swedish-cased-ner
nlp = pipeline('ner', model='KBLab/bert-base-swedish-cased-ner', tokenizer='KBLab/bert-base-swedish-cased-ner')
# Run NER
nlp_results = nlp(text)
print('nlp_results:', nlp_results)
nlp_results_merged = merge_split_tokens(nlp_results)
# Fix TypeError("'numpy.float32' object is not iterable")
nlp_results_adjusted = map(lambda entity: dict(entity, **{ 'score': float(entity['score']) }), nlp_results_merged)
print('nlp_results_adjusted:', nlp_results_adjusted)
# Return values
return {'entities': list(nlp_results_adjusted)}
gradio_interface = gradio.Interface(
fn=process_swedish_text,
inputs="text",
outputs="json",
examples=[
["Jag heter Tom och bor i Stockholm."],
["Groens malmgård är en av Stockholms malmgårdar, belägen vid Malmgårdsvägen 53 på Södermalm i Stockholm."]
],
title="Swedish Entity Recognition",
description="Recognizing Swedish tokens e.g. locations and person names.",
article="© Tom Söderlund 2022"
)
gradio_interface.launch()
|