From eabd992d1c9c8e7fbcd1d1bb4dd2896d78623873 Mon Sep 17 00:00:00 2001 From: Ilkyu Ju Date: Thu, 19 Aug 2021 14:18:33 +0900 Subject: [PATCH] fix: romanization for syllable with a double consonant final --- .gitignore | 2 ++ korean_romanizer/pronouncer.py | 41 ++++++++++++++++++---------------- tests/test_romanizer.py | 13 +++++++++++ 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 2bbdb69..8e5d2c5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc .cache/v/cache/lastfailed +node_modules/* +public/build/* diff --git a/korean_romanizer/pronouncer.py b/korean_romanizer/pronouncer.py index eeb808e..30f7126 100644 --- a/korean_romanizer/pronouncer.py +++ b/korean_romanizer/pronouncer.py @@ -1,17 +1,17 @@ from korean_romanizer.syllable import Syllable double_consonant_final = { - 'ㄳ' : ('ㄱ', 'ㅅ'), - 'ㄵ' : ('ᆫ', 'ㅈ'), + 'ᆪ' : ('ᆨ', 'ᆺ'), + 'ᆬ' : ('ᆫ', 'ᆽ'), 'ᆭ' : ('ᆫ', 'ᇂ'), - 'ㄺ' : ('ㄹ', 'ㄱ'), - 'ㄻ' : ('ㄹ', 'ㅁ'), - 'ㄼ' : ('ㄹ', 'ㅂ'), - 'ㄽ' : ('ㄹ', 'ㅅ'), - 'ㄾ' : ('ㄹ', 'ㅌ'), - 'ㄿ' : ('ㄹ', 'ㅍ'), + 'ᆰ' : ('ᆯ', 'ᆨ'), + 'ᆱ' : ('ᆯ', 'ᆷ'), + 'ᆲ' : ('ᆯ', 'ᆸ'), + 'ᆳ' : ('ᆯ', 'ᆻ'), + 'ᆴ' : ('ᆯ', 'ᇀ'), + 'ᆵ' : ('ᆯ', 'ᇁ'), 'ㅀ' : ('ㄹ', 'ᇂ'), - 'ㅄ' : ('ㅂ', 'ㅅ'), + 'ᆹ' : ('ᆸ', 'ᆺ'), 'ㅆ' : ('ㅅ', 'ㅅ') } @@ -40,11 +40,13 @@ def final_substitute(self): except AttributeError: final_is_before_V = False + is_last_syllable = syllable.final and next_syllable is None + # 1. 받침 ‘ㄲ, ㅋ’, ‘ㅅ, ㅆ, ㅈ, ㅊ, ㅌ’, ‘ㅍ’은 어말 또는 자음 앞에서 각각 대표음 [ㄱ, ㄷ, ㅂ]으로 발음한다. # 2. 겹받침 ‘ㄳ’, ‘ㄵ’, ‘ㄼ, ㄽ, ㄾ’, ‘ㅄ’은 어말 또는 자음 앞에서 각각 [ㄱ, ㄴ, ㄹ, ㅂ]으로 발음한다. # 3. 겹받침 ‘ㄺ, ㄻ, ㄿ’은 어말 또는 자음 앞에서 각각 [ㄱ, ㅁ, ㅂ]으로 발음한다. # <-> 단, 국어의 로마자 표기법 규정에 의해 된소리되기는 표기에 반영하지 않으므로 제외. - if(syllable.final or final_is_before_C): + if is_last_syllable or final_is_before_C: if(syllable.final in ['ᆩ', 'ᆿ', 'ᆪ', 'ᆰ']): syllable.final = 'ᆨ' elif(syllable.final in ['ᆺ', 'ᆻ', 'ᆽ', 'ᆾ', 'ᇀ']): @@ -57,8 +59,7 @@ def final_substitute(self): syllable.final = 'ᆯ' elif(syllable.final in ['ᆱ']): syllable.final = 'ᆷ' - - + # 4. 받침 ‘ㅎ’의 발음은 다음과 같다. if syllable.final in ['ᇂ', 'ᆭ', 'ᆶ']: @@ -95,17 +96,19 @@ def final_substitute(self): else: if (syllable.final == 'ᇂ'): syllable.final = None - # 5. 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는, - # 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다. - if next_syllable and final_is_before_V: - if(next_syllable.initial == NULL_CONSONANT and syllable.final not in ["ᆼ", None]): # do nothing if final is ᆼ or null - next_syllable.initial = next_syllable.final_to_initial(syllable.final) - syllable.final = None # 6. 겹받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는, # 뒤엣것만을 뒤 음절 첫소리로 옮겨 발음한다.(이 경우, ‘ㅅ’은 된소리로 발음함.) - if syllable.final in double_consonant_final: + if syllable.final in double_consonant_final and next_syllable.initial == NULL_CONSONANT: double_consonant = double_consonant_final[syllable.final] syllable.final = double_consonant[0] next_syllable.initial = next_syllable.final_to_initial(double_consonant[1]) + + # 5. 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는, + # 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다. + if next_syllable and final_is_before_V: + if(next_syllable.initial == NULL_CONSONANT and syllable.final not in ["ᆼ", None]): # do nothing if final is ᆼ or null + next_syllable.initial = next_syllable.final_to_initial(syllable.final) + syllable.final = None + return self._syllables diff --git a/tests/test_romanizer.py b/tests/test_romanizer.py index 66a0630..fa4e59d 100644 --- a/tests/test_romanizer.py +++ b/tests/test_romanizer.py @@ -21,6 +21,7 @@ def test_coda_g_d_b(): assert romanize("바닷가") == "badatga" assert romanize("없다") == "eopda" assert romanize("앞만") == "apman" + assert romanize("읊다") == "eupda" def test_r_l(): assert romanize("구리") == "guri" @@ -34,6 +35,18 @@ def test_next_syllable_null_initial(): assert romanize("좋아하고") == "joahago" assert romanize("좋은") == "joeun" +def test_double_consonant_final_and_next_syllable_null_initial(): + assert romanize("했었어요") == "haesseosseoyo" + assert romanize("없었다") == "eopseotda" + assert romanize("앉아봐") == "anjabwa" + assert romanize("닭의") == "dalgui" + assert romanize("밟아") == "balba" + assert romanize("닮았네") == "dalmatne" + assert romanize("삯을") == "sakseul" + assert romanize("앓았다") == "aratda" + assert romanize("읊어 보거라") == "eulpeo bogeora" + assert romanize("곬이") == "golssi" + assert romanize("훑어보다") == "hulteoboda" def test_non_syllables(): assert romanize("ㅠㄴㅁㄱ") == "ㅠㄴㅁㄱ"