From 03397d6b7d24d1ec614df9c710d2c0f614e3cc29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=92=E7=8C=AB=E5=A4=A7=E7=A6=8F?= <93469977+rokujyushi@users.noreply.github.com> Date: Fri, 15 Nov 2024 12:05:52 +0900 Subject: [PATCH] =?UTF-8?q?`mora=5Fmapping.py`=20=E3=81=ABOpenJTalk?= =?UTF-8?q?=E3=81=AB=E7=84=A1=E3=81=84Kana=E3=82=92=E8=BF=BD=E5=8A=A0=20(#?= =?UTF-8?q?1473)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hiroshiba Co-authored-by: Hiroshiba Kazuyuki --- voicevox_engine/tts_pipeline/mora_mapping.py | 48 ++++++++++++++++++++ voicevox_engine/user_dict/model.py | 6 +-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/voicevox_engine/tts_pipeline/mora_mapping.py b/voicevox_engine/tts_pipeline/mora_mapping.py index bd7df524b..f0c5f1010 100644 --- a/voicevox_engine/tts_pipeline/mora_mapping.py +++ b/voicevox_engine/tts_pipeline/mora_mapping.py @@ -56,6 +56,7 @@ "ゥ", "ウ", "ウィ", + "ウゥ", "ウェ", "ウォ", "ェ", @@ -65,18 +66,30 @@ "カ", "ガ", "キ", + "キィ", "キェ", "キャ", "キュ", "キョ", "ギ", + "ギィ", "ギェ", "ギャ", "ギュ", "ギョ", "ク", + "クァ", + "クィ", + "クゥ", + "クェ", + "クォ", "クヮ", "グ", + "グァ", + "グィ", + "グゥ", + "グェ", + "グォ", "グヮ", "ケ", "ゲ", @@ -110,6 +123,10 @@ "チュ", "チョ", "ヂ", + "ヂェ", + "ヂャ", + "ヂュ", + "ヂョ", "ッ", "ツ", "ツァ", @@ -119,6 +136,7 @@ "ヅ", "テ", "ティ", + "テェ", "テャ", "テュ", "テョ", @@ -134,6 +152,7 @@ "ドゥ", "ナ", "ニ", + "ニィ", "ニェ", "ニャ", "ニュ", @@ -145,16 +164,19 @@ "バ", "パ", "ヒ", + "ヒィ", "ヒェ", "ヒャ", "ヒュ", "ヒョ", "ビ", + "ビィ", "ビェ", "ビャ", "ビュ", "ビョ", "ピ", + "ピィ", "ピェ", "ピャ", "ピュ", @@ -174,6 +196,7 @@ "ポ", "マ", "ミ", + "ミィ", "ミェ", "ミャ", "ミュ", @@ -189,6 +212,7 @@ "ヨ", "ラ", "リ", + "リィ", "リェ", "リャ", "リュ", @@ -228,6 +252,7 @@ ("リュ", "ry", "u"), ("リャ", "ry", "a"), ("リェ", "ry", "e"), + ("リィ", "ry", "i"), ("リ", "r", "i"), ("ラ", "r", "a"), ("ヨ", "y", "o"), @@ -240,6 +265,7 @@ ("ミュ", "my", "u"), ("ミャ", "my", "a"), ("ミェ", "my", "e"), + ("ミィ", "my", "i"), ("ミ", "m", "i"), ("マ", "m", "a"), ("ポ", "p", "o"), @@ -259,16 +285,19 @@ ("ピュ", "py", "u"), ("ピャ", "py", "a"), ("ピェ", "py", "e"), + ("ピィ", "py", "i"), ("ピ", "p", "i"), ("ビョ", "by", "o"), ("ビュ", "by", "u"), ("ビャ", "by", "a"), ("ビェ", "by", "e"), + ("ビィ", "by", "i"), ("ビ", "b", "i"), ("ヒョ", "hy", "o"), ("ヒュ", "hy", "u"), ("ヒャ", "hy", "a"), ("ヒェ", "hy", "e"), + ("ヒィ", "hy", "i"), ("ヒ", "h", "i"), ("パ", "p", "a"), ("バ", "b", "a"), @@ -280,6 +309,7 @@ ("ニュ", "ny", "u"), ("ニャ", "ny", "a"), ("ニェ", "ny", "e"), + ("ニィ", "ny", "i"), ("ニ", "n", "i"), ("ナ", "n", "a"), ("ドゥ", "d", "u"), @@ -295,6 +325,7 @@ ("テョ", "ty", "o"), ("テュ", "ty", "u"), ("テャ", "ty", "a"), + ("テェ", "ty", "e"), ("ティ", "t", "i"), ("テ", "t", "e"), ("ツォ", "ts", "o"), @@ -335,18 +366,28 @@ ("ゲ", "g", "e"), ("ケ", "k", "e"), ("グヮ", "gw", "a"), + ("グォ", "gw", "o"), + ("グェ", "gw", "e"), + ("グゥ", "gw", "u"), + ("グィ", "gw", "i"), ("グ", "g", "u"), ("クヮ", "kw", "a"), + ("クォ", "kw", "o"), + ("クェ", "kw", "e"), + ("クゥ", "kw", "u"), + ("クィ", "kw", "i"), ("ク", "k", "u"), ("ギョ", "gy", "o"), ("ギュ", "gy", "u"), ("ギャ", "gy", "a"), ("ギェ", "gy", "e"), + ("ギィ", "gy", "i"), ("ギ", "g", "i"), ("キョ", "ky", "o"), ("キュ", "ky", "u"), ("キャ", "ky", "a"), ("キェ", "ky", "e"), + ("キィ", "ky", "i"), ("キ", "k", "i"), ("ガ", "g", "a"), ("カ", "k", "a"), @@ -354,6 +395,7 @@ ("エ", None, "e"), ("ウォ", "w", "o"), ("ウェ", "w", "e"), + ("ウゥ", "w", "u"), ("ウィ", "w", "i"), ("ウ", None, "u"), ("イェ", "y", "e"), @@ -371,7 +413,13 @@ ("ョ", "y", "o"), ("ュ", "y", "u"), ("ヅ", "z", "u"), + ("ヂョ", "j", "o"), + ("ヂュ", "j", "u"), + ("ヂャ", "j", "a"), + ("ヂェ", "j", "e"), ("ヂ", "j", "i"), + ("グァ", "gw", "a"), + ("クァ", "kw", "a"), ("ヶ", "k", "e"), ("ャ", "y", "a"), ("ォ", None, "o"), diff --git a/voicevox_engine/user_dict/model.py b/voicevox_engine/user_dict/model.py index 355564f3e..0cc0dd88b 100644 --- a/voicevox_engine/user_dict/model.py +++ b/voicevox_engine/user_dict/model.py @@ -90,10 +90,10 @@ def check_is_katakana(cls, pronunciation: str) -> str: def check_mora_count_and_accent_type(self) -> Self: if self.mora_count is None: rule_others = ( - "[イ][ェ]|[ヴ][ャュョ]|[トド][ゥ]|[テデ][ィャュョ]|[デ][ェ]|[クグ][ヮ]" + "[イ][ェ]|[ヴ][ャュョ]|[ウクグトド][ゥ]|[テデ][ィェャュョ]|[クグ][ヮ]" ) - rule_line_i = "[キシチニヒミリギジビピ][ェャュョ]" - rule_line_u = "[ツフヴ][ァ]|[ウスツフヴズ][ィ]|[ウツフヴ][ェォ]" + rule_line_i = "[キシチニヒミリギジヂビピ][ェャュョ]|[キニヒミリギビピ][ィ]" + rule_line_u = "[クツフヴグ][ァ]|[ウクスツフヴグズ][ィ]|[ウクツフヴグ][ェォ]" rule_one_mora = "[ァ-ヴー]" self.mora_count = len( findall(