From d49563a3511cb582e4025b7a10f4972319d3ad14 Mon Sep 17 00:00:00 2001 From: Dan Peebles Date: Sun, 8 Jan 2017 17:01:47 -0500 Subject: [PATCH] =?UTF-8?q?When=20using=20diacritics,=20replace=20'v'=20wi?= =?UTF-8?q?th=20'=C3=BC'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps with words like 女, for which we previously output 'nv̌'. Now we return the more conventional 'nǚ'. --- pinyin/pinyin.py | 1 + test_pinyin.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pinyin/pinyin.py b/pinyin/pinyin.py index 631e4b2..37d2653 100644 --- a/pinyin/pinyin.py +++ b/pinyin/pinyin.py @@ -39,6 +39,7 @@ def _pinyin_generator(chars, format): vowels = itertools.chain((c for c in pinyin if c in "aeo"), (c for c in pinyin if c in "iuv")) vowel = pinyin.index(next(vowels)) + 1 + pinyin = pinyin.replace('v', u('ü')) pinyin = pinyin[:vowel] + tonemarks[tone] + pinyin[vowel:] else: error = "Format must be one of: numerical/diacritical/strip" diff --git a/test_pinyin.py b/test_pinyin.py index d5158ee..ab6fbe6 100644 --- a/test_pinyin.py +++ b/test_pinyin.py @@ -20,7 +20,7 @@ def test_get(self): self.assertEqual(pinyin.get('你好'), u('nǐhǎo')) self.assertEqual(pinyin.get('叶'), u('yè')) - self.assertEqual(pinyin.get('少女'), u('shǎonv̌')) + self.assertEqual(pinyin.get('少女'), u('shǎonǚ')) def test_get_with_delimiter(self): self.assertEqual(pinyin.get('你好', " "), u('nǐ hǎo')) @@ -47,7 +47,7 @@ def test_correct_diacritical(self): self.assertEqual(pinyin.get("小"), u("xiǎo")) self.assertEqual(pinyin.get("绝"), u("jué")) self.assertEqual(pinyin.get("被"), u("bèi")) - self.assertEqual(pinyin.get("略"), u("lvè")) + self.assertEqual(pinyin.get("略"), u("lüè")) if __name__ == '__main__':