Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ public override void Verse(
string pubNumber
)
{
// Handle non-latin numbers
VerseRef verseRef = _curVerseRef.Clone();
verseRef.TrySetVerseUnicode(number);

if (state.ChapterHasVerseZero && state.VerseRef.VerseNum == 0)
{
// Fall through for the special case of verse 0 being specified in the USFM
Expand All @@ -81,11 +85,14 @@ string pubNumber

return;
}
else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse))
else if (
VerseRef.AreOverlappingVersesRanges(verse1: verseRef.Verse, verse2: _curVerseRef.Verse)
&& VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse)
)
{
// merge overlapping verse ranges in to one range
VerseRef verseRef = _curVerseRef.Clone();
verseRef.Verse = CorporaUtils.MergeVerseRanges(number, _curVerseRef.Verse);
verseRef = _curVerseRef.Clone();
verseRef.TrySetVerseUnicode(CorporaUtils.MergeVerseRanges(number, _curVerseRef.Verse));
UpdateVerseRef(verseRef, marker);
return;
}
Expand Down Expand Up @@ -292,7 +299,7 @@ private void EndNonVerseText(UsfmParserState state)

private void UpdateVerseRef(VerseRef verseRef, string marker)
{
if (_curVerseRef.VerseNum == 0 && verseRef.VerseNum == 0 && marker == "v")
if (_curVerseRef.VerseNum == 0 && verseRef.VerseNum == 0 && !verseRef.HasMultiple && marker == "v")
{
// As the verse 0 marker appears within the middle of verse 0,
// we should not break the position of current element stack by clearing it.
Expand Down
6 changes: 4 additions & 2 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,11 @@ public bool ProcessToken()

// Verse
vref = State.VerseRef;
vref.Verse = token.Data;
if (vref.VerseNum == 0)
vref.TrySetVerseUnicode(token.Data);
if (vref.VerseNum == 0) // This token is \v 0
State.ChapterHasVerseZero = true;
else if (vref.VerseNum == -1) // Ignore invalid verse numbers
vref.VerseNum = State.VerseRef.VerseNum;
State.VerseRef = vref;
State.VerseOffset = 0;

Expand Down
297 changes: 297 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,303 @@ public void GetRows_PrivateUseMarker()
});
}

[Test]
public void GetRows_NonLatinVerseNumber()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\p
\v १ Verse 1
\v 3,৪ Verses 3 and 4
\p
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(4));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:৪")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[1].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[2].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[2].Text, Is.EqualTo("Verse 1"), string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[3].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:3")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[3].Text,
Is.EqualTo("Verses 3 and 4"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

[Test]
public void GetRows_EmptyVerseNumber()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\p
\v
\b
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(2));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:b")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[1].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));
});
}

[Test]
public void GetRows_MultipleEmptyVerseNumbers()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\p
\v
\p
\v
\p
\v
\p
",
includeAllText: true
);

const int RowCount = 4;
Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(RowCount));

for (int i = 0; i < RowCount; i++)
{
Assert.That(
rows[i].Ref,
Is.EqualTo(ScriptureRef.Parse($"MAT 1:0/{i + 1}:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[i].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));
}
});
}

[Test]
public void GetRows_EmptyVerseNumberWithText()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\s heading text
\v \vn 1 verse text
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(2));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:s")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[0].Text,
Is.EqualTo("heading text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:vn")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[1].Text,
Is.EqualTo("1 verse text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

[Test]
public void GetRows_EmptyVerseNumberMidVerse()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\p
\v 1 verse 1 text
\v
\v 2 verse 2 text
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(3));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[0].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[1].Text,
Is.EqualTo("verse 1 text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);

Assert.That(
rows[2].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:2")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[2].Text,
Is.EqualTo("verse 2 text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

[Test]
public void GetRows_InvalidVerseNumbers()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\p
\v BK1 text goes here
\v BK 2 text goes here
\v BK 3 text goes here
\v BK 4 text goes here
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(1));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[0].Text,
Is.EqualTo("text goes here 2 text goes here 3 text goes here 4 text goes here"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

[Test]
public void GetRows_IncompleteVerseRange()
{
TextRow[] rows = GetRows(
@"\id MAT - Test
\c 1
\s heading text
\p
\q1
\v 1,
\q1 verse 1 text
",
includeAllText: true
);

Assert.Multiple(() =>
{
Assert.That(rows, Has.Length.EqualTo(4));

Assert.That(
rows[0].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:s")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[0].Text,
Is.EqualTo("heading text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);

Assert.That(
rows[1].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:p")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[1].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[2].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1/3:q1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(rows[2].Text, Is.Empty, string.Join(",", rows.ToList().Select(tr => tr.Text)));

Assert.That(
rows[3].Ref,
Is.EqualTo(ScriptureRef.Parse("MAT 1:1/4:q1")),
string.Join(",", rows.ToList().Select(tr => tr.Ref.ToString()))
);
Assert.That(
rows[3].Text,
Is.EqualTo("verse 1 text"),
string.Join(",", rows.ToList().Select(tr => tr.Text))
);
});
}

private static TextRow[] GetRows(string usfm, bool includeMarkers = false, bool includeAllText = false)
{
UsfmMemoryText text =
Expand Down
Loading