Skip to content

Commit

Permalink
Auto merge of #17 - mozillazg:develop, r=mozillazg
Browse files Browse the repository at this point in the history
v0.11.0
  • Loading branch information
homu committed Oct 28, 2016
2 parents 638ff21 + beb7182 commit 127001a
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 9 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
# Changelog


## 0.11.0 (2016-10-28)

* **Changed** 不再使用 `0` 表示轻声(因为之前并没有正确的实现这个功能, 同时也觉得这个功能没必要)。
顺便修复了 Tone2 中 `ü` 标轻声的问题(像 `侵略 -> qi1n lv0e4`
* **NEW** 新增 `Tone3``FinalsTone3` 拼音风格。

hans := "中国人"
args := pinyin.NewArgs()
args.Style = pinyin.Tone3
fmt.Println("Tone3:", pinyin.Pinyin(hans, args))
// Output: Tone3: [[zhong1] [guo2] [ren2]]

args.Style = pinyin.FinalsTone3
fmt.Println("FinalsTone3:", pinyin.Pinyin(hans, args))
// Output: FinalsTone3: [[ong1] [uo2] [en2]]



## 0.10.0 (2016-10-18)

* **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.0
Expand Down
2 changes: 1 addition & 1 deletion phonetic_symbol.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ var phoneticSymbol = map[string]string{
"ú": "u2",
"ǔ": "u3",
"ù": "u4",
"ü": "v0",
"ü": "v",
"ǘ": "v2",
"ǚ": "v3",
"ǜ": "v4",
Expand Down
26 changes: 18 additions & 8 deletions pinyin.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (

// Meta
const (
Version = "0.10.0"
Version = "0.11.0"
Author = "mozillazg, 闲耘"
License = "MIT"
Copyright = "Copyright (c) 2016 mozillazg, 闲耘"
Expand All @@ -17,12 +17,14 @@ const (
const (
Normal = 0 // 普通风格,不带声调(默认风格)。如: zhong guo
Tone = 1 // 声调风格1,拼音声调在韵母第一个字母上。如: zhōng guó
Tone2 = 2 // 声调风格2,即拼音声调在各个拼音之后,用数字 [0-4] 进行表示。如: zho1ng guo2
Tone2 = 2 // 声调风格2,即拼音声调在各个韵母之后,用数字 [1-4] 进行表示。如: zho1ng guo2
Tone3 = 8 // 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示。如: zhong1 guo2
Initials = 3 // 声母风格,只返回各个拼音的声母部分。如: zh g
FirstLetter = 4 // 首字母风格,只返回拼音的首字母部分。如: z g
Finals = 5 // 韵母风格1,只返回各个拼音的韵母部分,不带声调。如: ong uo
FinalsTone = 6 // 韵母风格2,带声调,声调在韵母第一个字母上。如: ōng uó
FinalsTone2 = 7 // 韵母风格2,带声调,声调在各个拼音之后,用数字 [0-4] 进行表示。如: o1ng uo2
Finals = 5 // 韵母风格,只返回各个拼音的韵母部分,不带声调。如: ong uo
FinalsTone = 6 // 韵母风格1,带声调,声调在韵母第一个字母上。如: ōng uó
FinalsTone2 = 7 // 韵母风格2,带声调,声调在各个韵母之后,用数字 [1-4] 进行表示。如: o1ng uo2
FinalsTone3 = 9 // 韵母风格3,带声调,声调在各个拼音之后,用数字 [1-4] 进行表示。如: ong1 uo2
)

// 拼音风格(兼容之前的版本)
Expand Down Expand Up @@ -56,7 +58,10 @@ var rePhoneticSymbolSource = func(m map[string]string) string {
var rePhoneticSymbol = regexp.MustCompile("[" + rePhoneticSymbolSource + "]")

// 匹配使用数字标识声调的字符的正则表达式
var reTone2 = regexp.MustCompile("([aeoiuvnm])([0-4])$")
var reTone2 = regexp.MustCompile("([aeoiuvnm])([1-4])$")

// 匹配 Tone2 中标识韵母声调的正则表达式
var reTone3 = regexp.MustCompile("^([a-z]+)([1-4])([a-z]*)$")

// Args 配置信息
type Args struct {
Expand Down Expand Up @@ -159,7 +164,7 @@ func toFixed(p string, a Args) string {
case Normal, FirstLetter, Finals:
// 去掉声调: a1 -> a
m = reTone2.ReplaceAllString(symbol, "$1")
case Tone2, FinalsTone2:
case Tone2, FinalsTone2, Tone3, FinalsTone3:
// 返回使用数字标识声调的字符
m = symbol
default:
Expand All @@ -168,12 +173,17 @@ func toFixed(p string, a Args) string {
return m
})

switch a.Style {
// 将声调移动到最后
case Tone3, FinalsTone3:
py = reTone3.ReplaceAllString(py, "$1$3$2")
}
switch a.Style {
// 首字母
case FirstLetter:
py = py[:1]
// 韵母
case Finals, FinalsTone, FinalsTone2:
case Finals, FinalsTone, FinalsTone2, FinalsTone3:
// 转换为 []rune unicode 编码用于获取第一个拼音字符
// 因为 string 是 utf-8 编码不方便获取第一个拼音字符
rs := []rune(origP)
Expand Down
38 changes: 38 additions & 0 deletions pinyin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ func TestPinyin(t *testing.T) {
{"re2n"},
},
},
// Tone3
{
Args{Style: Tone3},
[][]string{
{"zhong1"},
{"guo2"},
{"ren2"},
},
},
// Initials
{
Args{Style: Initials},
Expand Down Expand Up @@ -113,6 +122,15 @@ func TestPinyin(t *testing.T) {
{"e2n"},
},
},
// FinalsTone3
{
Args{Style: FinalsTone3},
[][]string{
{"ong1"},
{"uo2"},
{"en2"},
},
},
// Heteronym
{
Args{Heteronym: true},
Expand Down Expand Up @@ -275,50 +293,70 @@ func TestUpdated(t *testing.T) {
testData := []testItem{
// 误把 yu 放到声母列表了
{"鱼", Args{Style: Tone2}, [][]string{{"yu2"}}},
{"鱼", Args{Style: Tone3}, [][]string{{"yu2"}}},
{"鱼", Args{Style: Finals}, [][]string{{"v"}}},
{"雨", Args{Style: Tone2}, [][]string{{"yu3"}}},
{"雨", Args{Style: Tone3}, [][]string{{"yu3"}}},
{"雨", Args{Style: Finals}, [][]string{{"v"}}},
{"元", Args{Style: Tone2}, [][]string{{"yua2n"}}},
{"元", Args{Style: Tone3}, [][]string{{"yuan2"}}},
{"元", Args{Style: Finals}, [][]string{{"van"}}},
// y, w 也不是拼音, yu的韵母是v, yi的韵母是i, wu的韵母是u
{"呀", Args{Style: Initials}, [][]string{{""}}},
{"呀", Args{Style: Tone2}, [][]string{{"ya"}}},
{"呀", Args{Style: Tone3}, [][]string{{"ya"}}},
{"呀", Args{Style: Finals}, [][]string{{"ia"}}},
{"无", Args{Style: Initials}, [][]string{{""}}},
{"无", Args{Style: Tone2}, [][]string{{"wu2"}}},
{"无", Args{Style: Tone3}, [][]string{{"wu2"}}},
{"无", Args{Style: Finals}, [][]string{{"u"}}},
{"衣", Args{Style: Tone2}, [][]string{{"yi1"}}},
{"衣", Args{Style: Tone3}, [][]string{{"yi1"}}},
{"衣", Args{Style: Finals}, [][]string{{"i"}}},
{"万", Args{Style: Tone2}, [][]string{{"wa4n"}}},
{"万", Args{Style: Tone3}, [][]string{{"wan4"}}},
{"万", Args{Style: Finals}, [][]string{{"uan"}}},
// ju, qu, xu 的韵母应该是 v
{"具", Args{Style: FinalsTone}, [][]string{{"ǜ"}}},
{"具", Args{Style: FinalsTone2}, [][]string{{"v4"}}},
{"具", Args{Style: FinalsTone3}, [][]string{{"v4"}}},
{"具", Args{Style: Finals}, [][]string{{"v"}}},
{"取", Args{Style: FinalsTone}, [][]string{{"ǚ"}}},
{"取", Args{Style: FinalsTone2}, [][]string{{"v3"}}},
{"取", Args{Style: FinalsTone3}, [][]string{{"v3"}}},
{"取", Args{Style: Finals}, [][]string{{"v"}}},
{"徐", Args{Style: FinalsTone}, [][]string{{"ǘ"}}},
{"徐", Args{Style: FinalsTone2}, [][]string{{"v2"}}},
{"徐", Args{Style: FinalsTone3}, [][]string{{"v2"}}},
{"徐", Args{Style: Finals}, [][]string{{"v"}}},
// # ń
{"嗯", Args{Style: Normal}, [][]string{{"n"}}},
{"嗯", Args{Style: Tone}, [][]string{{"ń"}}},
{"嗯", Args{Style: Tone2}, [][]string{{"n2"}}},
{"嗯", Args{Style: Tone3}, [][]string{{"n2"}}},
{"嗯", Args{Style: Initials}, [][]string{{""}}},
{"嗯", Args{Style: FirstLetter}, [][]string{{"n"}}},
{"嗯", Args{Style: Finals}, [][]string{{"n"}}},
{"嗯", Args{Style: FinalsTone}, [][]string{{"ń"}}},
{"嗯", Args{Style: FinalsTone2}, [][]string{{"n2"}}},
{"嗯", Args{Style: FinalsTone3}, [][]string{{"n2"}}},
// # ḿ \u1e3f U+1E3F
{"呣", Args{Style: Normal}, [][]string{{"m"}}},
{"呣", Args{Style: Tone}, [][]string{{"ḿ"}}},
{"呣", Args{Style: Tone2}, [][]string{{"m2"}}},
{"呣", Args{Style: Tone3}, [][]string{{"m2"}}},
{"呣", Args{Style: Initials}, [][]string{{""}}},
{"呣", Args{Style: FirstLetter}, [][]string{{"m"}}},
{"呣", Args{Style: Finals}, [][]string{{"m"}}},
{"呣", Args{Style: FinalsTone}, [][]string{{"ḿ"}}},
{"呣", Args{Style: FinalsTone2}, [][]string{{"m2"}}},
{"呣", Args{Style: FinalsTone3}, [][]string{{"m2"}}},
// 去除 0
{"啊", Args{Style: Tone2}, [][]string{{"a"}}},
{"啊", Args{Style: Tone3}, [][]string{{"a"}}},
{"侵略", Args{Style: Tone2}, [][]string{{"qi1n"}, {"lve4"}}},
{"侵略", Args{Style: FinalsTone2}, [][]string{{"i1n"}, {"ve4"}}},
{"侵略", Args{Style: FinalsTone3}, [][]string{{"in1"}, {"ve4"}}},
}
testPinyinUpdate(t, testData, Pinyin)
}
Expand Down

0 comments on commit 127001a

Please sign in to comment.