From 50269e8b4dd0696d02e5da9f70c2d7952a26f392 Mon Sep 17 00:00:00 2001 From: WeiGodHorse Date: Fri, 25 Mar 2022 22:58:41 +0800 Subject: [PATCH] fix a bug in Mandarin pypinyin_g2p_phone --- espnet2/text/phoneme_tokenizer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/espnet2/text/phoneme_tokenizer.py b/espnet2/text/phoneme_tokenizer.py index e6791d1818c..3af10ac6b94 100644 --- a/espnet2/text/phoneme_tokenizer.py +++ b/espnet2/text/phoneme_tokenizer.py @@ -199,9 +199,11 @@ def pypinyin_g2p_phone(text) -> List[str]: for phone in pinyin(text, style=Style.TONE3) for p in [ get_initials(phone[0], strict=True), - get_finals(phone[0], strict=True), + # When using the new pypinyin, the previous code will drop the finals + get_finals(phone[0][:-1], strict=True) + phone[0][-1] if phone[0][-1].isdigit() else get_finals(phone[0], strict=True), ] - if len(p) != 0 + # Remove the case of individual tones as a phoneme + if len(p) != 0 and not p.isdigit() ] return phones