From 35a87205b7977e7d9a7d83ad194659137cc298fa Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 15 Feb 2021 06:01:42 -0800 Subject: [PATCH] Replace model with full name when spacy load is used (#1140) Reviewed By: zhangguanheng66 Differential Revision: D26369005 fbshipit-source-id: b1e6b5d77810bb8f67d14b8a1c7ec0a9f4831cab --- torchtext/data/utils.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/torchtext/data/utils.py b/torchtext/data/utils.py index 045c2646fb..12f653290b 100644 --- a/torchtext/data/utils.py +++ b/torchtext/data/utils.py @@ -112,7 +112,17 @@ def get_tokenizer(tokenizer, language='en'): if tokenizer == "spacy": try: import spacy - spacy = spacy.load(language) + try: + spacy = spacy.load(language) + except IOError: + # Model shortcuts no longer work in spaCy 3.0+, try using fullnames + # List is from https://github.com/explosion/spaCy/blob/b903de3fcb56df2f7247e5b6cfa6b66f4ff02b62/spacy/errors.py#L789 + OLD_MODEL_SHORTCUTS = spacy.errors.OLD_MODEL_SHORTCUTS if hasattr(spacy.errors, 'OLD_MODEL_SHORTCUTS') else {} + if language not in OLD_MODEL_SHORTCUTS: + raise + import warnings + warnings.warn(f'Spacy model "{language}" could not be loaded, trying "{OLD_MODEL_SHORTCUTS[language]}" instead') + spacy = spacy.load(OLD_MODEL_SHORTCUTS[language]) return partial(_spacy_tokenize, spacy=spacy) except ImportError: print("Please install SpaCy. "