diff --git a/espnet2/text/phoneme_tokenizer.py b/espnet2/text/phoneme_tokenizer.py index 4580cc5d726..570e165f521 100644 --- a/espnet2/text/phoneme_tokenizer.py +++ b/espnet2/text/phoneme_tokenizer.py @@ -199,10 +199,11 @@ def pypinyin_g2p_phone(text) -> List[str]: for phone in pinyin(text, style=Style.TONE3) for p in [ get_initials(phone[0], strict=True), - # When using the new pypinyin, the previous code will drop the finals get_finals(phone[0][:-1], strict=True) + phone[0][-1] if phone[0][-1].isdigit() - else get_finals(phone[0], strict=True), + else get_finals(phone[0], strict=True) + if phone[0][-1].isalnum() + else phone[0], ] # Remove the case of individual tones as a phoneme if len(p) != 0 and not p.isdigit()