from spacy.util import registry from spacy.tokenizer import Tokenizer import pathlib @registry.tokenizers("customize_tokenizer") def make_customize_tokenizer(): def customize_tokenizer(nlp): tokenizer = Tokenizer(nlp.vocab) script_dir = pathlib.Path(__file__).parent.resolve() return tokenizer.from_disk(script_dir / "tokenizer") return customize_tokenizer