2nd version of the tokenizer fix

coqui-ai · Oct 6, 2023 · 2fdf51e · 2fdf51e
1 parent 1ec3418
commit 2fdf51e
Showing 1 changed file with 5 additions and 3 deletions.
diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py
@@ -223,9 +223,11 @@ def preprocess_text(self, txt, lang):
  results = kks.convert(txt)
  txt = " ".join([result["kana"] for result in results])
  txt = basic_cleaners(txt)
- # elif lang == "en":
- # txt = english_cleaners(txt)
- # English cleaner remove the language tag [en]
+ elif lang == "en":
+ if txt[:4] == "[en]":
+ txt = txt[4:]
+ txt = english_cleaners(txt)
+ txt = "[en]" + txt
  elif lang == "ar":
  txt = arabic_cleaners(txt)
  elif lang == "zh-cn":