HazyResearch · lukehsiao · Mar 28, 2019 · Mar 21, 2019 · Mar 26, 2019 · Mar 26, 2019
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -62,6 +62,8 @@ Fixed
     spaCy only support version 0.7.
 * `@HiromuHota`_: Use black 18.9b0 or higher to be consistent with isort.
   (`#225 <https://github.com/HazyResearch/fonduer/issues/225>`_)
+* `@HiromuHota`_: Workaround no longer required for Japanese as of spaCy v2.1.0.
+  (`#224 <https://github.com/HazyResearch/fonduer/pull/224>`_)
 * `@senwu`_: Update the metal version.
 * `@senwu`_: Expose the ``b`` and ``pos_label`` in training.
 * `@senwu`_: Fix the issue that pdfinfo causes parsing error when it contains

diff --git a/src/fonduer/parser/spacy_parser.py b/src/fonduer/parser/spacy_parser.py
@@ -125,20 +125,6 @@ def load_lang_model(self):
             language_module = importlib.import_module(f"spacy.lang.{self.lang}")
             language_method = getattr(language_module, self.alpha_languages[self.lang])
             model = language_method()
-            """ TODO: Depending on OS (Linux/macOS) and on the sentence to be parsed,
-            UnicodeDecodeError or ValueError happens at the first use when lang='ja'.
-            As a workaround, the model parses some sentence before actually being used.
-            """
-            if self.lang == "ja":
-                try:
-                    model("初期化")
-                except (UnicodeDecodeError, ValueError):
-                    pass
-            if self.lang == "zh":
-                try:
-                    model("初始化")
-                except (UnicodeDecodeError, ValueError):
-                    pass
         self.model = model
 
     def sentence_list_separator_function(self, all_sentence_objs):