from spacy.util import registry | |
from spacy.tokenizer import Tokenizer | |
import pathlib | |
def make_customize_tokenizer(): | |
def customize_tokenizer(nlp): | |
tokenizer = Tokenizer(nlp.vocab) | |
script_dir = pathlib.Path(__file__).parent.resolve() | |
return tokenizer.from_disk(script_dir / "tokenizer") | |
return customize_tokenizer | |