it_trf_nrp / use_custom_tokenizer.py
Lazy-Val's picture
Update spaCy pipeline
30d0611 verified
raw
history blame contribute delete
391 Bytes
from spacy.util import registry
from spacy.tokenizer import Tokenizer
import pathlib
@registry.tokenizers("customize_tokenizer")
def make_customize_tokenizer():
def customize_tokenizer(nlp):
tokenizer = Tokenizer(nlp.vocab)
script_dir = pathlib.Path(__file__).parent.resolve()
return tokenizer.from_disk(script_dir / "tokenizer")
return customize_tokenizer