diff --git a/pythainlp/util/syllable.py b/pythainlp/util/syllable.py index 9d480d815..bf3d4be4c 100644 --- a/pythainlp/util/syllable.py +++ b/pythainlp/util/syllable.py @@ -184,7 +184,7 @@ def syllable_length(syllable: str) -> str: # output: short """ consonants = [i for i in syllable if i in list(thai_consonants)] - if len(consonants) < 3 and any((c in set(short)) for c in syllable): + if len(consonants) <= 3 and any((c in set(short)) for c in syllable): return "short" elif bool(re_short.search(syllable)): return "short" @@ -259,8 +259,6 @@ def tone_detector(syllable: str) -> str: and tone_mark == "่" ): r = "l" - elif initial_consonant == "อ" and consonant_ending and s == "dead": - r = "l" elif ( initial_consonant == "ห" and consonant_ending @@ -268,6 +266,8 @@ def tone_detector(syllable: str) -> str: and tone_mark == "่" ): r = "l" + elif initial_consonant == "อ" and consonant_ending and s == "dead": + r = "l" elif ( initial_consonant == "ห" and consonant_ending @@ -279,6 +279,22 @@ def tone_detector(syllable: str) -> str: r = "l" elif initial_consonant == "ห" and consonant_ending and s == "live": r = "r" + elif initial_consonant_type == "high" and s == "live" and tone_mark == "่": + r = "l" + elif initial_consonant_type == "mid" and s == "live" and tone_mark == "่": + r = "l" + elif initial_consonant_type == "low" and tone_mark == "้": + r = "h" + elif initial_consonant_type == "mid" and tone_mark == "๋": + r = "r" + elif initial_consonant_type == "mid" and tone_mark == "๊": + r = "h" + elif initial_consonant_type == "low" and tone_mark == "่": + r = "f" + elif initial_consonant_type == "mid" and tone_mark == "้": + r = "f" + elif initial_consonant_type == "high" and tone_mark == "้": + r = "f" elif ( initial_consonant_type == "low" and syllable_check_length == "short" @@ -299,22 +315,6 @@ def tone_detector(syllable: str) -> str: and syllable_check == "open" ): r = "h" - elif initial_consonant_type == "high" and s == "live" and tone_mark == "่": - r = "l" - elif initial_consonant_type == "mid" and s == "live" and tone_mark == "่": - r = "l" - elif initial_consonant_type == "low" and tone_mark == "้": - r = "h" - elif initial_consonant_type == "mid" and tone_mark == "๋": - r = "r" - elif initial_consonant_type == "mid" and tone_mark == "๊": - r = "h" - elif initial_consonant_type == "low" and tone_mark == "่": - r = "f" - elif initial_consonant_type == "mid" and tone_mark == "้": - r = "f" - elif initial_consonant_type == "high" and tone_mark == "้": - r = "f" elif initial_consonant_type == "mid" and s == "dead": r = "l" elif initial_consonant_type == "high" and s == "dead": diff --git a/tests/core/test_util.py b/tests/core/test_util.py index de7a7a9d1..aef11e094 100644 --- a/tests/core/test_util.py +++ b/tests/core/test_util.py @@ -708,6 +708,8 @@ def test_tone_detector(self): ("f", "ไหม้"), ("f", "ต้น"), ("f", "ผู้"), + ("h", "ครับ"), + ("f", "ค่ะ"), ] for i, j in data: self.assertEqual(tone_detector(j), i)