sotirios-slv commited on
Commit
d8d40d6
1 Parent(s): c444b8a

Fixed entity collection

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -19,7 +19,13 @@ def get_named_entities(ocr_text: str):
19
  sentence = [Sentence(sent, use_tokenizer=True) for sent in split_single(ocr_text)]
20
  tagger.predict(sentence)
21
 
22
- entities = [entity for entity in sent.get_spans("ner") for sent in sentence]
 
 
 
 
 
 
23
  print("Entities ", entities)
24
  return entities
25
 
@@ -44,7 +50,7 @@ def get_named_entities(ocr_text: str):
44
  # print(pytesseract.image_to_osd(Image.open('test.png'))
45
 
46
 
47
- def run(image, lang=None):
48
  print("Lang ", lang)
49
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
50
 
 
19
  sentence = [Sentence(sent, use_tokenizer=True) for sent in split_single(ocr_text)]
20
  tagger.predict(sentence)
21
 
22
+ # entities = [entity for entity in sent.get_spans("ner") for sent in sentence]
23
+ entities = []
24
+
25
+ for sent in sentence:
26
+ for entity in sent.get_spans("ner"):
27
+ entities.append(entity)
28
+
29
  print("Entities ", entities)
30
  return entities
31
 
 
50
  # print(pytesseract.image_to_osd(Image.open('test.png'))
51
 
52
 
53
+ def run(image, lang="eng"):
54
  print("Lang ", lang)
55
  result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
56