From d9eb43bbffdb9a7119a85ff456638026a9cc9d3b Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Fri, 20 Nov 2020 11:02:49 -0800 Subject: [PATCH 1/7] Update VerseRef.TryGetVerseNum() to handle non-latin numerals --- SIL.Scripture.Tests/VerseRefTests.cs | 32 ++++++++++++++++++++++++++++ SIL.Scripture/VerseRef.cs | 4 ++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index 01633ae9b..6f0ecd9f0 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2067,6 +2067,38 @@ public void UnBridge() Assert.AreEqual(new VerseRef("EXO 6:4"), new VerseRef("EXO 6:4-10").UnBridge()); Assert.AreEqual(new VerseRef("EXO 6:150monkeys"), new VerseRef("EXO 6:150monkeys").UnBridge()); } + + /// + /// Tests the Verse property's set method with numerals from various Unicode-supported scripts + /// + [Test] + public void SetVerse_InterpretUnicodeNumerals() + { + VerseRef vref = new VerseRef("EXO 6:5"); + Assert.AreEqual(5, vref.VerseNum); + + vref.Verse = "५"; // Devanagari numeral + Assert.AreEqual(5, vref.VerseNum); + + vref.Verse = "૧૬"; // Gujarati numeral + Assert.AreEqual(16, vref.VerseNum); + + vref.Verse = "൬"; // Malayalam numeral + Assert.AreEqual(6, vref.VerseNum); + + vref.Verse = "᠔"; // Mongolian numeral + Assert.AreEqual(4, vref.VerseNum); + + vref.Verse = "๙"; // Thai numeral + Assert.AreEqual(9, vref.VerseNum); + + vref.Verse = "A"; // Latin non-numeral + Assert.AreEqual(-1, vref.VerseNum); + + vref.Verse = "ะ"; // Thai non-numeral + Assert.AreEqual(-1, vref.VerseNum); + } + #endregion #region InRange diff --git a/SIL.Scripture/VerseRef.cs b/SIL.Scripture/VerseRef.cs index 379fdee1c..ca6d286d3 100644 --- a/SIL.Scripture/VerseRef.cs +++ b/SIL.Scripture/VerseRef.cs @@ -302,14 +302,14 @@ static bool TryGetVerseNum(string verseStr, out short vNum) for (int i = 0; i < verseStr.Length; i++) { char ch = verseStr[i]; - if (ch < '0' || ch > '9') + if (!char.IsDigit(ch)) { if (i == 0) vNum = -1; return false; } - vNum = (short)(vNum * 10 + ch - '0'); + vNum = (short)(vNum * 10 + char.GetNumericValue(ch)); if (vNum > bcvMaxValue) { // whoops, we got too big! From d7fe4bbfa608916bfd37775a3ab67e0505109f5b Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Fri, 20 Nov 2020 13:24:34 -0800 Subject: [PATCH 2/7] Change unit test per review --- SIL.Scripture.Tests/VerseRefTests.cs | 38 +++++++++------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index 6f0ecd9f0..b84bb03ca 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2071,32 +2071,18 @@ public void UnBridge() /// /// Tests the Verse property's set method with numerals from various Unicode-supported scripts /// - [Test] - public void SetVerse_InterpretUnicodeNumerals() - { - VerseRef vref = new VerseRef("EXO 6:5"); - Assert.AreEqual(5, vref.VerseNum); - - vref.Verse = "५"; // Devanagari numeral - Assert.AreEqual(5, vref.VerseNum); - - vref.Verse = "૧૬"; // Gujarati numeral - Assert.AreEqual(16, vref.VerseNum); - - vref.Verse = "൬"; // Malayalam numeral - Assert.AreEqual(6, vref.VerseNum); - - vref.Verse = "᠔"; // Mongolian numeral - Assert.AreEqual(4, vref.VerseNum); - - vref.Verse = "๙"; // Thai numeral - Assert.AreEqual(9, vref.VerseNum); - - vref.Verse = "A"; // Latin non-numeral - Assert.AreEqual(-1, vref.VerseNum); - - vref.Verse = "ะ"; // Thai non-numeral - Assert.AreEqual(-1, vref.VerseNum); + [TestCase("५", 5, TestName = "Devanagari numeral")] + [TestCase("૧૬", 16, TestName = "Gujarati numeral")] + [TestCase("5",5, TestName = "Latin numeral")] + [TestCase("᠔", 4, TestName = "Mongolian numeral")] + [TestCase("A", -1, TestName = "Latin non-numeral")] + [TestCase("ะ", -1, TestName = "Thai non-numeral")] + public void SetVerse_InterpretUnicodeNumerals(string verseStr, int parsedVerse) + { + VerseRef vref = new VerseRef("EXO 6:1"); + + vref.Verse = verseStr; + Assert.AreEqual(parsedVerse, vref.VerseNum); } #endregion From c40a3410dde71927a8db25d0c6dc2de6e3dd074c Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Fri, 20 Nov 2020 13:53:22 -0800 Subject: [PATCH 3/7] Clean up unit test --- SIL.Scripture.Tests/VerseRefTests.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index b84bb03ca..108f26b3f 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2071,18 +2071,18 @@ public void UnBridge() /// /// Tests the Verse property's set method with numerals from various Unicode-supported scripts /// - [TestCase("५", 5, TestName = "Devanagari numeral")] - [TestCase("૧૬", 16, TestName = "Gujarati numeral")] - [TestCase("5",5, TestName = "Latin numeral")] - [TestCase("᠔", 4, TestName = "Mongolian numeral")] - [TestCase("A", -1, TestName = "Latin non-numeral")] - [TestCase("ะ", -1, TestName = "Thai non-numeral")] - public void SetVerse_InterpretUnicodeNumerals(string verseStr, int parsedVerse) + [TestCase("५", ExpectedResult = 5, TestName = "Devanagari numeral")] + [TestCase("૧૬", ExpectedResult = 16, TestName = "Gujarati numeral")] + [TestCase("5", ExpectedResult = 5, TestName = "Latin numeral")] + [TestCase("᠔", ExpectedResult = 4, TestName = "Mongolian numeral")] + [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] + [TestCase("ะ", ExpectedResult = -1, TestName = "Thai non-numeral")] + public int SetVerse_InterpretUnicodeNumerals(string verseStr) { VerseRef vref = new VerseRef("EXO 6:1"); vref.Verse = verseStr; - Assert.AreEqual(parsedVerse, vref.VerseNum); + return vref.VerseNum; } #endregion From 43b22f2dcd12b04ef1fff4e4e5b7e564b089733a Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Tue, 24 Nov 2020 14:39:00 -0800 Subject: [PATCH 4/7] Separate non-Roman verse setting to restore USX compliance --- SIL.Scripture.Tests/VerseRefTests.cs | 4 ++-- SIL.Scripture/VerseRef.cs | 27 ++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index 108f26b3f..e04542ce7 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2077,11 +2077,11 @@ public void UnBridge() [TestCase("᠔", ExpectedResult = 4, TestName = "Mongolian numeral")] [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] [TestCase("ะ", ExpectedResult = -1, TestName = "Thai non-numeral")] - public int SetVerse_InterpretUnicodeNumerals(string verseStr) + public int SetVerseUnicode_InterpretNumerals(string verseStr) { VerseRef vref = new VerseRef("EXO 6:1"); - vref.Verse = verseStr; + vref.SetVerseUnicode(verseStr); return vref.VerseNum; } diff --git a/SIL.Scripture/VerseRef.cs b/SIL.Scripture/VerseRef.cs index ca6d286d3..25eb54e12 100644 --- a/SIL.Scripture/VerseRef.cs +++ b/SIL.Scripture/VerseRef.cs @@ -290,7 +290,7 @@ public string Text /// true if the entire string could be parsed as a single, /// simple verse number (1-999); false if the verse string represented /// a verse bridge, contained segment letters, or was invalid - static bool TryGetVerseNum(string verseStr, out short vNum) + static bool TryGetVerseNum(string verseStr, out short vNum, bool romanOnly = true) { if (string.IsNullOrEmpty(verseStr)) { @@ -302,14 +302,14 @@ static bool TryGetVerseNum(string verseStr, out short vNum) for (int i = 0; i < verseStr.Length; i++) { char ch = verseStr[i]; - if (!char.IsDigit(ch)) + if (romanOnly ? (ch < '0' || ch > '9') : !char.IsDigit(ch)) { if (i == 0) vNum = -1; return false; } - vNum = (short)(vNum * 10 + char.GetNumericValue(ch)); + vNum = (short)(vNum * 10 + (romanOnly ? ch - '0' : char.GetNumericValue(ch))); if (vNum > bcvMaxValue) { // whoops, we got too big! @@ -1338,6 +1338,27 @@ static void ParseVerseNumber(string vNum, out int number, out string segment) segment = vNum.Substring(j); } + /// + /// Parses a verse string and gets the leading numeric portion as a number. + /// Functionally identical to Verse.Set for Roman numbers, made distinct to preserve USX standard. + /// + /// true if the entire string could be parsed as a single, + /// simple verse number in any supported script; false if the verse string represented + /// a verse bridge, contained segment letters, or was invalid + public void SetVerseUnicode(string value) + { + { + short vNum; + verse = !TryGetVerseNum(value, out vNum, false) ? value.Replace(rtlMark, "") : null; + verseNum = vNum; + if (verseNum >= 0) + return; + + Trace.TraceWarning("Just failed to parse a verse number: " + value); + TryGetVerseNum(verse, out verseNum, false); + } + } + /// /// Returns whether any of the specified references overlap with this one /// From 3abd9fbb13485663ec4a941e8d65a8d0a93ead04 Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Wed, 25 Nov 2020 11:44:18 -0800 Subject: [PATCH 5/7] Clerical improvements per code review +semver:minor --- CHANGELOG.md | 1 + SIL.Scripture.Tests/VerseRefTests.cs | 27 ++++++++++++++++++++++++--- SIL.Scripture/VerseRef.cs | 11 +++++------ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f32b37ca..d3bec4397 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - [SIL.DblBundle] `TextBundle.GetFonts` (to replace deprecated `CopyFontFiles`) - [SIL.DblBundle] `TextBundle.GetLdml` (to replace deprecated `CopyLdmlFile`) - [SIL.Scripture] `ScrVers.Save` overload to allow serialization to a `TextWriter`. +- [SIL.Scripture] `VerseRef.TrySetVerseUnicode` to set 'verse' and 'verseRef' variables with non-Roman numerals. - [SIL.Core] `XmlSerializationHelper.Serialize` to allow serialization to a `TextWriter`. - [SIL.Core] `XmlSerializationHelper.Deserialize` to allow deserialization from a `TextReader`. - [SIL.Core] `Platform.IsGnomeShell` to detect if executing in a Gnome Shell diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index e04542ce7..11969f495 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2069,7 +2069,7 @@ public void UnBridge() } /// - /// Tests the Verse property's set method with numerals from various Unicode-supported scripts + /// Tests the TrySetVerseUnicode method with numerals from various Unicode-supported scripts /// [TestCase("५", ExpectedResult = 5, TestName = "Devanagari numeral")] [TestCase("૧૬", ExpectedResult = 16, TestName = "Gujarati numeral")] @@ -2077,11 +2077,32 @@ public void UnBridge() [TestCase("᠔", ExpectedResult = 4, TestName = "Mongolian numeral")] [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] [TestCase("ะ", ExpectedResult = -1, TestName = "Thai non-numeral")] - public int SetVerseUnicode_InterpretNumerals(string verseStr) + [TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented.")] + [TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented.")] + [TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral", IgnoreReason = "Surrogate pair handling not yet implemented.")] + public int TrySetVerseUnicode_InterpretNumerals(string verseStr) { VerseRef vref = new VerseRef("EXO 6:1"); - vref.SetVerseUnicode(verseStr); + bool success = vref.TrySetVerseUnicode(verseStr); + Assert.AreEqual(success, vref.VerseNum != -1); + + return vref.VerseNum; + } + + /// + /// Tests the Verse property's set method with various input strings + /// + [TestCase("5", ExpectedResult = 5, TestName = "Latin numeral")] + [TestCase("524", ExpectedResult = 524, TestName = "Large Latin numeral")] + [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] + [TestCase("૧૬", ExpectedResult = -1, TestName = "Non-Latin numeral")] + public int SetVerse_InterpretNumerals(string verseStr) + { + VerseRef vref = new VerseRef("EXO 6:1"); + + vref.Verse = verseStr; + return vref.VerseNum; } diff --git a/SIL.Scripture/VerseRef.cs b/SIL.Scripture/VerseRef.cs index 25eb54e12..63a83e9d0 100644 --- a/SIL.Scripture/VerseRef.cs +++ b/SIL.Scripture/VerseRef.cs @@ -245,8 +245,7 @@ public string Verse get { return verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString()); } set { - short vNum; - verse = !TryGetVerseNum(value, out vNum) ? value.Replace(rtlMark, "") : null; + verse = !TryGetVerseNum(value, out short vNum) ? value.Replace(rtlMark, "") : null; verseNum = vNum; if (verseNum >= 0) return; @@ -1345,17 +1344,17 @@ static void ParseVerseNumber(string vNum, out int number, out string segment) /// true if the entire string could be parsed as a single, /// simple verse number in any supported script; false if the verse string represented /// a verse bridge, contained segment letters, or was invalid - public void SetVerseUnicode(string value) + public bool TrySetVerseUnicode(string value) { { - short vNum; - verse = !TryGetVerseNum(value, out vNum, false) ? value.Replace(rtlMark, "") : null; + verse = !TryGetVerseNum(value, out short vNum, false) ? value.Replace(rtlMark, "") : null; verseNum = vNum; if (verseNum >= 0) - return; + return true; Trace.TraceWarning("Just failed to parse a verse number: " + value); TryGetVerseNum(verse, out verseNum, false); + return false; } } From ce21b22f53b9cc4bb260fe75819eed4805aa53d2 Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Mon, 30 Nov 2020 09:17:39 -0800 Subject: [PATCH 6/7] Refactor and add unit tests per code review --- SIL.Scripture.Tests/VerseRefTests.cs | 12 ++++++--- SIL.Scripture/VerseRef.cs | 40 +++++++++++++--------------- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/SIL.Scripture.Tests/VerseRefTests.cs b/SIL.Scripture.Tests/VerseRefTests.cs index 11969f495..dc072c781 100644 --- a/SIL.Scripture.Tests/VerseRefTests.cs +++ b/SIL.Scripture.Tests/VerseRefTests.cs @@ -2077,9 +2077,11 @@ public void UnBridge() [TestCase("᠔", ExpectedResult = 4, TestName = "Mongolian numeral")] [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] [TestCase("ะ", ExpectedResult = -1, TestName = "Thai non-numeral")] - [TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented.")] - [TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented.")] - [TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral", IgnoreReason = "Surrogate pair handling not yet implemented.")] + [TestCase("᠔-᠔", ExpectedResult = 4, TestName = "Mongolian complex verse")] + [TestCase("᠔ᠠ", ExpectedResult = 4, TestName = "Mongolian complex verse - lettered")] + [TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")] + [TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")] + [TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral", IgnoreReason = "Surrogate pair handling not yet implemented. (See issue #1000.)")] public int TrySetVerseUnicode_InterpretNumerals(string verseStr) { VerseRef vref = new VerseRef("EXO 6:1"); @@ -2097,6 +2099,10 @@ public int TrySetVerseUnicode_InterpretNumerals(string verseStr) [TestCase("524", ExpectedResult = 524, TestName = "Large Latin numeral")] [TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")] [TestCase("૧૬", ExpectedResult = -1, TestName = "Non-Latin numeral")] + [TestCase("1-", ExpectedResult = 1, TestName = "Complex verse - incomplete")] + [TestCase("2.3", ExpectedResult = 2, TestName = "Complex verse - decimal")] + [TestCase("5-7", ExpectedResult = 5, TestName = "Complex verse - complete")] + [TestCase("7a", ExpectedResult = 7, TestName = "Complex verse - lettered")] public int SetVerse_InterpretNumerals(string verseStr) { VerseRef vref = new VerseRef("EXO 6:1"); diff --git a/SIL.Scripture/VerseRef.cs b/SIL.Scripture/VerseRef.cs index 63a83e9d0..61b6c84bb 100644 --- a/SIL.Scripture/VerseRef.cs +++ b/SIL.Scripture/VerseRef.cs @@ -242,17 +242,8 @@ public string Chapter [XmlIgnore] public string Verse { - get { return verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString()); } - set - { - verse = !TryGetVerseNum(value, out short vNum) ? value.Replace(rtlMark, "") : null; - verseNum = vNum; - if (verseNum >= 0) - return; - - Trace.TraceWarning("Just failed to parse a verse number: " + value); - TryGetVerseNum(verse, out verseNum); - } + get => verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString()); + set => TrySetVerse(value); } /// @@ -283,6 +274,22 @@ public string Text } } + /// + /// Tries to set verse and verseNum by parsing the `value` string. + /// This is used by Verse.set and TrySetVerseUnicode + /// + /// true if the verse was set successfully + bool TrySetVerse(string value, bool romanOnly = true) + { + verse = !TryGetVerseNum(value, out verseNum, romanOnly) ? value.Replace(rtlMark, "") : null; + if (verseNum >= 0) + return true; + + Trace.TraceWarning("Just failed to parse a verse number: " + value); + TryGetVerseNum(verse, out verseNum, romanOnly); + return false; + } + /// /// Parses a verse string and gets the leading numeric portion as a number. /// @@ -1346,16 +1353,7 @@ static void ParseVerseNumber(string vNum, out int number, out string segment) /// a verse bridge, contained segment letters, or was invalid public bool TrySetVerseUnicode(string value) { - { - verse = !TryGetVerseNum(value, out short vNum, false) ? value.Replace(rtlMark, "") : null; - verseNum = vNum; - if (verseNum >= 0) - return true; - - Trace.TraceWarning("Just failed to parse a verse number: " + value); - TryGetVerseNum(verse, out verseNum, false); - return false; - } + return TrySetVerse(value, false); } /// From 5cc3b4f65179c5cfedcfa770f9132b85e408a82a Mon Sep 17 00:00:00 2001 From: SamDelaney Date: Tue, 1 Dec 2020 09:49:47 -0800 Subject: [PATCH 7/7] Remove default value from verse setting methods --- SIL.Scripture/VerseRef.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/SIL.Scripture/VerseRef.cs b/SIL.Scripture/VerseRef.cs index 61b6c84bb..2600fa2ef 100644 --- a/SIL.Scripture/VerseRef.cs +++ b/SIL.Scripture/VerseRef.cs @@ -243,7 +243,7 @@ public string Chapter public string Verse { get => verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString()); - set => TrySetVerse(value); + set => TrySetVerse(value, true); // The USX standard only expects support for Latin numerals {0-9}* in verse numbers. } /// @@ -279,14 +279,14 @@ public string Text /// This is used by Verse.set and TrySetVerseUnicode /// /// true if the verse was set successfully - bool TrySetVerse(string value, bool romanOnly = true) + bool TrySetVerse(string value, bool romanOnly) { - verse = !TryGetVerseNum(value, out verseNum, romanOnly) ? value.Replace(rtlMark, "") : null; + verse = !TryGetVerseNum(value, romanOnly, out verseNum) ? value.Replace(rtlMark, "") : null; if (verseNum >= 0) return true; Trace.TraceWarning("Just failed to parse a verse number: " + value); - TryGetVerseNum(verse, out verseNum, romanOnly); + TryGetVerseNum(verse, romanOnly, out verseNum); return false; } @@ -296,7 +296,7 @@ bool TrySetVerse(string value, bool romanOnly = true) /// true if the entire string could be parsed as a single, /// simple verse number (1-999); false if the verse string represented /// a verse bridge, contained segment letters, or was invalid - static bool TryGetVerseNum(string verseStr, out short vNum, bool romanOnly = true) + static bool TryGetVerseNum(string verseStr, bool romanOnly , out short vNum) { if (string.IsNullOrEmpty(verseStr)) {