import spacy from spacy.matcher import Matcher def get_entities(sent): ## chunk 1 ent1 = "" ent2 = "" prv_tok_dep = "" # dependency tag of previous token in the sentence prv_tok_text = "" # previous token in the sentence prefix = "" modifier = "" ############################################################# for tok in nlp(sent): ## chunk 2 # if token is a punctuation mark then move on to the next token if tok.dep_ != "punct": # check: token is a compound word or not if tok.dep_ == "compound": prefix = tok.text # if the previous word was also a 'compound' then add the current word to it if prv_tok_dep == "compound": prefix = prv_tok_text + " " + tok.text # check: token is a modifier or not if tok.dep_.endswith("mod") == True: modifier = tok.text # if the previous word was also a 'compound' then add the current word to it if prv_tok_dep == "compound": modifier = prv_tok_text + " " + tok.text ## chunk 3 if tok.dep_.find("subj") == True: ent1 = modifier + " " + prefix + " " + tok.text prefix = "" modifier = "" prv_tok_dep = "" prv_tok_text = "" ## chunk 4 if tok.dep_.find("obj") == True: ent2 = modifier + " " + prefix + " " + tok.text ## chunk 5 # update variables prv_tok_dep = tok.dep_ prv_tok_text = tok.text ############################################################# return [ent1.strip(), ent2.strip()] def get_relation(sent): nlp = spacy.load('en_core_web_sm') doc = nlp(sent) # Matcher class object matcher = Matcher(nlp.vocab) #define the pattern pattern = [{'DEP':'ROOT'}, {'DEP':'prep','OP':"?"}, {'DEP':'agent','OP':"?"}, {'POS':'ADJ','OP':"?"}] matcher.add('matching_pattern', patterns=[pattern]) matches = matcher(doc) k = len(matches) - 1 span = doc[matches[k][1]:matches[k][2]] return(span.text)