Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate non-Roman verse setting to restore USX compliance in VerseRef #999

Merged
merged 8 commits into from
Dec 2, 2020
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- [SIL.DblBundle] `TextBundle<TM, TL>.GetFonts` (to replace deprecated `CopyFontFiles`)
- [SIL.DblBundle] `TextBundle<TM, TL>.GetLdml` (to replace deprecated `CopyLdmlFile`)
- [SIL.Scripture] `ScrVers.Save` overload to allow serialization to a `TextWriter`.
- [SIL.Scripture] `VerseRef.TrySetVerseUnicode` to set 'verse' and 'verseRef' variables with non-Roman numerals.
- [SIL.Core] `XmlSerializationHelper.Serialize<T>` to allow serialization to a `TextWriter`.
- [SIL.Core] `XmlSerializationHelper.Deserialize<T>` to allow deserialization from a `TextReader`.
- [SIL.Core] `Platform.IsGnomeShell` to detect if executing in a Gnome Shell
Expand Down
31 changes: 29 additions & 2 deletions SIL.Scripture.Tests/VerseRefTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2069,19 +2069,46 @@ public void UnBridge()
}

/// <summary>
/// Tests the Verse property's set method with numerals from various Unicode-supported scripts
/// Tests the TrySetVerseUnicode method with numerals from various Unicode-supported scripts
/// </summary>
[TestCase("५", ExpectedResult = 5, TestName = "Devanagari numeral")]
[TestCase("૧૬", ExpectedResult = 16, TestName = "Gujarati numeral")]
[TestCase("5", ExpectedResult = 5, TestName = "Latin numeral")]
[TestCase("᠔", ExpectedResult = 4, TestName = "Mongolian numeral")]
[TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")]
[TestCase("ะ", ExpectedResult = -1, TestName = "Thai non-numeral")]
public int SetVerse_InterpretUnicodeNumerals(string verseStr)
[TestCase("᠔-᠔", ExpectedResult = 4, TestName = "Mongolian complex verse")]
[TestCase("᠔ᠠ", ExpectedResult = 4, TestName = "Mongolian complex verse - lettered")]
[TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")]
[TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")]
[TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral", IgnoreReason = "Surrogate pair handling not yet implemented. (See issue #1000.)")]
public int TrySetVerseUnicode_InterpretNumerals(string verseStr)
{
VerseRef vref = new VerseRef("EXO 6:1");

bool success = vref.TrySetVerseUnicode(verseStr);
Assert.AreEqual(success, vref.VerseNum != -1);

return vref.VerseNum;
}

/// <summary>
/// Tests the Verse property's set method with various input strings
/// </summary>
[TestCase("5", ExpectedResult = 5, TestName = "Latin numeral")]
[TestCase("524", ExpectedResult = 524, TestName = "Large Latin numeral")]
[TestCase("A", ExpectedResult = -1, TestName = "Latin non-numeral")]
[TestCase("૧૬", ExpectedResult = -1, TestName = "Non-Latin numeral")]
[TestCase("1-", ExpectedResult = 1, TestName = "Complex verse - incomplete")]
[TestCase("2.3", ExpectedResult = 2, TestName = "Complex verse - decimal")]
[TestCase("5-7", ExpectedResult = 5, TestName = "Complex verse - complete")]
[TestCase("7a", ExpectedResult = 7, TestName = "Complex verse - lettered")]
public int SetVerse_InterpretNumerals(string verseStr)
{
VerseRef vref = new VerseRef("EXO 6:1");

vref.Verse = verseStr;

return vref.VerseNum;
}
ermshiperete marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
48 changes: 33 additions & 15 deletions SIL.Scripture/VerseRef.cs
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,8 @@ public string Chapter
[XmlIgnore]
public string Verse
{
get { return verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString()); }
set
{
short vNum;
verse = !TryGetVerseNum(value, out vNum) ? value.Replace(rtlMark, "") : null;
verseNum = vNum;
if (verseNum >= 0)
return;

Trace.TraceWarning("Just failed to parse a verse number: " + value);
TryGetVerseNum(verse, out verseNum);
}
get => verse ?? (IsDefault || verseNum < 0 ? string.Empty : verseNum.ToString());
set => TrySetVerse(value, true); // The USX standard only expects support for Latin numerals {0-9}* in verse numbers.
}

/// <summary>
Expand Down Expand Up @@ -284,13 +274,29 @@ public string Text
}
}

/// <summary>
/// Tries to set verse and verseNum by parsing the `value` string.
/// This is used by Verse.set and TrySetVerseUnicode
/// </summary>
/// <returns><c>true</c> if the verse was set successfully </returns>
bool TrySetVerse(string value, bool romanOnly)
{
verse = !TryGetVerseNum(value, romanOnly, out verseNum) ? value.Replace(rtlMark, "") : null;
if (verseNum >= 0)
return true;

Trace.TraceWarning("Just failed to parse a verse number: " + value);
TryGetVerseNum(verse, romanOnly, out verseNum);
return false;
}

/// <summary>
/// Parses a verse string and gets the leading numeric portion as a number.
/// </summary>
/// <returns><c>true</c> if the entire string could be parsed as a single,
/// simple verse number (1-999); <c>false</c> if the verse string represented
/// a verse bridge, contained segment letters, or was invalid</returns>
static bool TryGetVerseNum(string verseStr, out short vNum)
static bool TryGetVerseNum(string verseStr, bool romanOnly , out short vNum)
{
if (string.IsNullOrEmpty(verseStr))
{
Expand All @@ -302,14 +308,14 @@ static bool TryGetVerseNum(string verseStr, out short vNum)
for (int i = 0; i < verseStr.Length; i++)
{
char ch = verseStr[i];
if (!char.IsDigit(ch))
if (romanOnly ? (ch < '0' || ch > '9') : !char.IsDigit(ch))
ermshiperete marked this conversation as resolved.
Show resolved Hide resolved
{
if (i == 0)
vNum = -1;
return false;
}

vNum = (short)(vNum * 10 + char.GetNumericValue(ch));
vNum = (short)(vNum * 10 + (romanOnly ? ch - '0' : char.GetNumericValue(ch)));
if (vNum > bcvMaxValue)
{
// whoops, we got too big!
Expand Down Expand Up @@ -1338,6 +1344,18 @@ static void ParseVerseNumber(string vNum, out int number, out string segment)
segment = vNum.Substring(j);
}

/// <summary>
/// Parses a verse string and gets the leading numeric portion as a number.
SamDelaney marked this conversation as resolved.
Show resolved Hide resolved
/// Functionally identical to Verse.Set for Roman numbers, made distinct to preserve USX standard.
/// </summary>
/// <returns><c>true</c> if the entire string could be parsed as a single,
/// simple verse number in any supported script; <c>false</c> if the verse string represented
/// a verse bridge, contained segment letters, or was invalid</returns>
ermshiperete marked this conversation as resolved.
Show resolved Hide resolved
public bool TrySetVerseUnicode(string value)
{
return TrySetVerse(value, false);
}
SamDelaney marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Returns whether any of the specified references overlap with this one
/// </summary>
Expand Down