Skip to content

Commit

Permalink
Refactoring logic of conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
spiegel-im-spiegel committed Sep 12, 2021
1 parent e3cd4a8 commit b3a43d4
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 32 deletions.
9 changes: 0 additions & 9 deletions kana/replace.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,6 @@ var (
}
)

// //ReplaceHiragana replaces hiragana from katrakana (full-width kana kcharacter only).
// func ReplaceHiragana(txt string) string {
// ss := []string{}
// for k, v := range replacekanaMap {
// ss = append(ss, k, v)
// }
// return strings.ToLowerSpecial(kanaCase, strings.NewReplacer(ss...).Replace(txt))
// }

//ReplaceKatakana replaces katakana from hiragana (full-width kana kcharacter only).
func ReplaceKatakana(txt string) string {
ss := []string{}
Expand Down
53 changes: 34 additions & 19 deletions krconv.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,54 @@ import (
)

func Convert(s string) string {
//conversion fullwidth katakana
kanaTxt := width.ConvertStringFold(kana.ReplaceKatakana(s))

//get character list
//get characters list
cl := []string{}
gr := uniseg.NewGraphemes(kanaTxt)
for gr.Next() {
cl = append(cl, table.RomanLetters(gr.Str()))
}
cl2 := make([]string, 0, len(cl))
//convert contracted character
for i := 0; i < len(cl); i++ {
if table.ExistContractedFirstChar(cl[i]) && i < len(cl)-1 {
if cc := table.GetContractedChars(cl[i], cl[i+1]); len(cc) > 0 {
cl2 = append(cl2, cc)
i++
next, ok := nextRomanLetters(gr)
for ok {
//convert contracted characters(拗音)
if table.ExistContractedFirstChar(next) {
char := next
next, ok = nextRomanLetters(gr)
if ok {
if cc, okok := table.GetContractedChars(char, next); okok {
cl = append(cl, cc)
next, ok = nextRomanLetters(gr)
} else {
cl = append(cl, char)
}
} else {
cl2 = append(cl2, cl[i])
cl = append(cl, char)
}
} else {
cl2 = append(cl2, cl[i])
cl = append(cl, next)
next, ok = nextRomanLetters(gr)
}
}

//check special characters
cl2 := cl
cl = make([]string, 0, len(cl2))
for i := 0; i < len(cl2); i++ {
switch cl2[i] {
case "xya", "xyu", "xyo": //単独拗音(ゃゅょ) to upper case
cl = append(cl, cl2[i][1:])
case "xya", "xyu", "xyo": //single 拗音(ゃゅょ)
cl = append(cl, cl2[i][1:]) //case toupper
case "n": //撥音(ん)
if i < len(cl2)-1 && (strings.HasPrefix(cl2[i+1], "b") || strings.HasPrefix(cl2[i+1], "m") || strings.HasPrefix(cl2[i+1], "p")) {
cl = append(cl, "m")
cl = append(cl, "m") //set character 'm'
} else {
cl = append(cl, cl2[i])
}
case "xtsu": //促音(っ)
if i >= len(cl2)-1 {
cl = append(cl, cl2[i][1:]) //促音 to upper case
cl = append(cl, cl2[i][1:]) //case toupper
} else if strings.HasPrefix(cl2[i+1], "ch") {
cl = append(cl, "t")
cl = append(cl, "t") //set letter 't'
} else {
cl = append(cl, cl2[i+1][:1])
cl = append(cl, cl2[i+1][:1]) //repeat the first character in next letter
}
default:
cl = append(cl, cl2[i])
Expand All @@ -59,6 +67,13 @@ func Convert(s string) string {
return strings.Join(cl, "")
}

func nextRomanLetters(gr *uniseg.Graphemes) (string, bool) {
if gr.Next() {
return table.RomanLetter(gr.Str()), true
}
return "", false
}

/* Copyright 2021 Spiegel
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
3 changes: 2 additions & 1 deletion table/characters.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package table

func RomanLetters(s string) string {
//RomanLetters function returns roman letter from fullwidth katakana.
func RomanLetter(s string) string {
if r, ok := mapCharacters[s]; ok {
return r
}
Expand Down
10 changes: 7 additions & 3 deletions table/contracted.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@ import (
"strings"
)

//ExistContractedFirstChar function returns true if string is first letter in contracted characters.
func ExistContractedFirstChar(first string) bool {
//binary search in contractedList
if n := sort.Search(len(contractedList), func(i int) bool { return strings.Compare(contractedList[i].roman1, first) >= 0 }); n < len(contractedList) && contractedList[n].roman1 == first {
return true
}
return false
}

func GetContractedChars(first, second string) string {
//GetContractedChars function returns contracted character from two letters.
func GetContractedChars(first, second string) (string, bool) {
//binary search in contractedList
n := sort.Search(len(contractedList), func(i int) bool {
if strings.Compare(contractedList[i].roman1, first) > 0 {
return true
Expand All @@ -23,9 +27,9 @@ func GetContractedChars(first, second string) string {
return false
})
if n < len(contractedList) && contractedList[n].roman1 == first && contractedList[n].roman2 == second {
return contractedList[n].contracted
return contractedList[n].contracted, true
}
return ""
return "", false
}

/* Copyright 2021 Spiegel
Expand Down

0 comments on commit b3a43d4

Please sign in to comment.