Skip to content

Commit

Permalink
VerseRef.TrySetVerseUnicode: Improve handling of non-decimal numerals…
Browse files Browse the repository at this point in the history
… and surrogate pair numerals (sillsdev#1000)
  • Loading branch information
elisunger committed Jan 4, 2022
1 parent 9823d1d commit 3a829c5
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- [SIL.WritingSystems] Update langtags.json to the latest
- [SIL.Scripture] Made VerseRef class implement new IScrVerseRef interface
- [SIL.Forms.Scripture] Changed VerseControl to use IScrVerseRef and not depend directly on ScrVers
- [SIL.Scripture] VerseRef.TrySetVerseUnicode: Improve handling of non-decimal numerals and surrogate pair numerals (#1000)

### Fixed

Expand Down
8 changes: 5 additions & 3 deletions SIL.Scripture.Tests/VerseRefTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2076,9 +2076,11 @@ public void UnBridge()
[TestCase("", ExpectedResult = -1, TestName = "Thai non-numeral")]
[TestCase("᠔-᠔", ExpectedResult = 4, TestName = "Mongolian complex verse")]
[TestCase("᠔ᠠ", ExpectedResult = 4, TestName = "Mongolian complex verse - lettered")]
[TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")]
[TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. (See issue #1000.)")]
[TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral", IgnoreReason = "Surrogate pair handling not yet implemented. (See issue #1000.)")]
[TestCase("二十", ExpectedResult = 20, TestName = "Japanese numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. See issue #1000")]
[TestCase("יא", ExpectedResult = 11, TestName = "Hebrew numeral", IgnoreReason = "Non-decimal numeral systems not yet implemented. See issue #1000")]
[TestCase("\U0001113A\U00011138", ExpectedResult = 42, TestName = "Chakma numeral")]
[TestCase("", ExpectedResult = 12, TestName = "Roman numeral LetterNumber")]
[TestCase("", ExpectedResult = 80, TestName = "Ethiopic numeral OtherNumber")]
public int TrySetVerseUnicode_InterpretNumerals(string verseStr)
{
VerseRef vref = new VerseRef("EXO 6:1");
Expand Down
61 changes: 51 additions & 10 deletions SIL.Scripture/VerseRef.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Text;
using JetBrains.Annotations;
using System.Globalization;

namespace SIL.Scripture
{
Expand Down Expand Up @@ -316,19 +317,59 @@ static bool TryGetVerseNum(string verseStr, bool romanOnly , out short vNum)
for (int i = 0; i < verseStr.Length; i++)
{
char ch = verseStr[i];
if (romanOnly ? (ch < '0' || ch > '9') : !char.IsDigit(ch))
if (!char.IsSurrogate(ch))
{
if (i == 0)
vNum = -1;
return false;
}
switch (char.GetUnicodeCategory(ch))
{
case UnicodeCategory.DecimalDigitNumber:
if (romanOnly ? (ch < '0' || ch > '9') : !char.IsDigit(ch))
{
if (i == 0)
vNum = -1;
return false;
}

vNum = (short)(vNum * 10 + (romanOnly ? ch - '0' : char.GetNumericValue(ch)));
if (vNum > bcvMaxValue)
vNum = (short)(vNum * 10 + (romanOnly ? ch - '0' : char.GetNumericValue(ch)));
if (vNum > bcvMaxValue)
{
// whoops, we got too big!
vNum = -1;
return false;
}

break;
case UnicodeCategory.Surrogate:
vNum = (short)(vNum * 10 + (char.GetNumericValue(verseStr, i)));

if (vNum > bcvMaxValue)
{
// whoops, we got too big!
vNum = -1;
return false;
}
break;
case UnicodeCategory.LetterNumber:
case UnicodeCategory.OtherNumber:
vNum += (short)char.GetNumericValue(verseStr, i);
break;
default:
if (i == 0)
vNum = -1;
return false;
}

}
else if (i + 1 < verseStr.Length && char.IsSurrogatePair(verseStr[i], verseStr[i + 1]))
{
// whoops, we got too big!
vNum = -1;
return false;
vNum = (short)(vNum * 10 + (char.GetNumericValue(verseStr, i)));

if (vNum > bcvMaxValue)
{
// whoops, we got too big!
vNum = -1;
return false;
}

}
}
return true;
Expand Down

0 comments on commit 3a829c5

Please sign in to comment.