Skip to content

Commit

Permalink
fix: romanization for syllable with a double consonant final
Browse files Browse the repository at this point in the history
  • Loading branch information
osori committed Aug 19, 2021
1 parent 8951501 commit eabd992
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 19 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

*.pyc
.cache/v/cache/lastfailed
node_modules/*
public/build/*
41 changes: 22 additions & 19 deletions korean_romanizer/pronouncer.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from korean_romanizer.syllable import Syllable

double_consonant_final = {
'' : ('', ''),
'' : ('ᆫ', ''),
'' : ('', ''),
'' : ('ᆫ', ''),
'ᆭ' : ('ᆫ', 'ᇂ'),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'' : ('', ''),
'ㅀ' : ('ㄹ', 'ᇂ'),
'' : ('', ''),
'' : ('', ''),
'ㅆ' : ('ㅅ', 'ㅅ')
}

Expand Down Expand Up @@ -40,11 +40,13 @@ def final_substitute(self):
except AttributeError:
final_is_before_V = False

is_last_syllable = syllable.final and next_syllable is None

# 1. 받침 ‘ㄲ, ㅋ’, ‘ㅅ, ㅆ, ㅈ, ㅊ, ㅌ’, ‘ㅍ’은 어말 또는 자음 앞에서 각각 대표음 [ㄱ, ㄷ, ㅂ]으로 발음한다.
# 2. 겹받침 ‘ㄳ’, ‘ㄵ’, ‘ㄼ, ㄽ, ㄾ’, ‘ㅄ’은 어말 또는 자음 앞에서 각각 [ㄱ, ㄴ, ㄹ, ㅂ]으로 발음한다.
# 3. 겹받침 ‘ㄺ, ㄻ, ㄿ’은 어말 또는 자음 앞에서 각각 [ㄱ, ㅁ, ㅂ]으로 발음한다.
# <-> 단, 국어의 로마자 표기법 규정에 의해 된소리되기는 표기에 반영하지 않으므로 제외.
if(syllable.final or final_is_before_C):
if is_last_syllable or final_is_before_C:
if(syllable.final in ['ᆩ', 'ᆿ', 'ᆪ', 'ᆰ']):
syllable.final = 'ᆨ'
elif(syllable.final in ['ᆺ', 'ᆻ', 'ᆽ', 'ᆾ', 'ᇀ']):
Expand All @@ -57,8 +59,7 @@ def final_substitute(self):
syllable.final = 'ᆯ'
elif(syllable.final in ['ᆱ']):
syllable.final = 'ᆷ'



# 4. 받침 ‘ㅎ’의 발음은 다음과 같다.
if syllable.final in ['ᇂ', 'ᆭ', 'ᆶ']:

Expand Down Expand Up @@ -95,17 +96,19 @@ def final_substitute(self):
else:
if (syllable.final == 'ᇂ'):
syllable.final = None
# 5. 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는,
# 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다.
if next_syllable and final_is_before_V:
if(next_syllable.initial == NULL_CONSONANT and syllable.final not in ["ᆼ", None]): # do nothing if final is ᆼ or null
next_syllable.initial = next_syllable.final_to_initial(syllable.final)
syllable.final = None

# 6. 겹받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는,
# 뒤엣것만을 뒤 음절 첫소리로 옮겨 발음한다.(이 경우, ‘ㅅ’은 된소리로 발음함.)
if syllable.final in double_consonant_final:
if syllable.final in double_consonant_final and next_syllable.initial == NULL_CONSONANT:
double_consonant = double_consonant_final[syllable.final]
syllable.final = double_consonant[0]
next_syllable.initial = next_syllable.final_to_initial(double_consonant[1])

# 5. 홑받침이나 쌍받침이 모음으로 시작된 조사나 어미, 접미사와 결합되는 경우에는,
# 제 음가대로 뒤 음절 첫소리로 옮겨 발음한다.
if next_syllable and final_is_before_V:
if(next_syllable.initial == NULL_CONSONANT and syllable.final not in ["ᆼ", None]): # do nothing if final is ᆼ or null
next_syllable.initial = next_syllable.final_to_initial(syllable.final)
syllable.final = None

return self._syllables
13 changes: 13 additions & 0 deletions tests/test_romanizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_coda_g_d_b():
assert romanize("바닷가") == "badatga"
assert romanize("없다") == "eopda"
assert romanize("앞만") == "apman"
assert romanize("읊다") == "eupda"

def test_r_l():
assert romanize("구리") == "guri"
Expand All @@ -34,6 +35,18 @@ def test_next_syllable_null_initial():
assert romanize("좋아하고") == "joahago"
assert romanize("좋은") == "joeun"

def test_double_consonant_final_and_next_syllable_null_initial():
assert romanize("했었어요") == "haesseosseoyo"
assert romanize("없었다") == "eopseotda"
assert romanize("앉아봐") == "anjabwa"
assert romanize("닭의") == "dalgui"
assert romanize("밟아") == "balba"
assert romanize("닮았네") == "dalmatne"
assert romanize("삯을") == "sakseul"
assert romanize("앓았다") == "aratda"
assert romanize("읊어 보거라") == "eulpeo bogeora"
assert romanize("곬이") == "golssi"
assert romanize("훑어보다") == "hulteoboda"

def test_non_syllables():
assert romanize("ㅠㄴㅁㄱ") == "ㅠㄴㅁㄱ"
Expand Down

0 comments on commit eabd992

Please sign in to comment.