diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index f34a7ac036..4b9fb9edef 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -6,7 +6,6 @@ from tokenizers import Tokenizer import pypinyin -import cutlet from num2words import num2words from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words @@ -484,10 +483,13 @@ def preprocess_text(self, txt, lang): if lang == "zh-cn": txt = chinese_transliterate(txt) elif lang == "ja": + assert txt[:4] == "[ja]", "Japanese speech should start with the [ja] token." + txt = txt[4:] if self.katsu is None: import cutlet self.katsu = cutlet.Cutlet() txt = japanese_cleaners(txt, self.katsu) + txt = "[ja]" + txt else: raise NotImplementedError() return txt