Skip to content

Commit

Permalink
Fix tokenizer for punc only (#1717)
Browse files Browse the repository at this point in the history
  • Loading branch information
WeberJulian authored Jul 6, 2022
1 parent 9e00e31 commit 5cef6fa
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
2 changes: 1 addition & 1 deletion TTS/tts/utils/text/punctuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def _restore(cls, text, puncs, num): # pylint: disable=too-many-return-statemen

# nothing have been phonemized, returns the puncs alone
if not text:
return ["".join(m.mark for m in puncs)]
return ["".join(m.punc for m in puncs)]

current = puncs[0]

Expand Down
7 changes: 7 additions & 0 deletions tests/text_tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ def test_text_to_ids_phonemes(self):
test_hat = self.tokenizer_ph.ids_to_text(ids)
self.assertEqual(text_ph, test_hat)

def test_text_to_ids_phonemes_punctuation(self):
text = "..."
text_ph = self.ph.phonemize(text, separator="")
ids = self.tokenizer_ph.text_to_ids(text)
test_hat = self.tokenizer_ph.ids_to_text(ids)
self.assertEqual(text_ph, test_hat)

def test_text_to_ids_phonemes_with_eos_bos(self):
text = "Bu bir Örnek."
self.tokenizer_ph.use_eos_bos = True
Expand Down

0 comments on commit 5cef6fa

Please sign in to comment.