Skip to content

Commit

Permalink
Lucene.Net.Codecs: Fixed testing condition for BaseTermVectorsFormatT…
Browse files Browse the repository at this point in the history
…estCase on TermVectorsReaders by throwing InvalidOperationException (fixes apache#267)
  • Loading branch information
NightOwl888 committed Jun 30, 2020
1 parent 40186fb commit 47fa223
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 37 deletions.
15 changes: 11 additions & 4 deletions src/Lucene.Net.Codecs/SimpleText/SimpleTextTermVectorsReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,19 @@ public override BytesRef GetPayload()

public override int NextPosition()
{
// LUCENENET NOTE: In Java, the assertion is being caught in the test (as an AssertionException).
// Technically, a "possible" (in fact "probable") scenario like this one, we should be throwing
// an exception, however doing that causes the checkIndex test to fail. The only logical thing we
// can do to make this compatible is to remove the assert.
//Debug.Assert((_positions != null && _nextPos < _positions.Length) ||
// _startOffsets != null && _nextPos < _startOffsets.Length);

// LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
// caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
// part that is checking for an error after reading to the end of the enumerator.

// Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
// in this case, which matches the behavior of Lucene 8. See #267.

if (((_positions != null && _nextPos < _positions.Length) || _startOffsets != null && _nextPos < _startOffsets.Length) == false)
throw new InvalidOperationException("Read past last position");

if (_positions != null)
{
return _positions[_nextPos++];
Expand Down
34 changes: 23 additions & 11 deletions src/Lucene.Net.TestFramework/Index/BaseTermVectorsFormatTestCase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using JCG = J2N.Collections.Generic;
using static Lucene.Net.Index.TermsEnum;
using Assert = Lucene.Net.TestFramework.Assert;
using AssertionError = Lucene.Net.Diagnostics.AssertionException;
using Attribute = Lucene.Net.Util.Attribute;

#if TESTFRAMEWORK_MSTEST
Expand Down Expand Up @@ -634,17 +635,28 @@ protected virtual void AssertEquals(RandomTokenStream tk, FieldType ft, Terms te
Assert.IsTrue(foundPayload);
}
}
try
{
docsAndPositionsEnum.NextPosition();
Assert.Fail();
}
#pragma warning disable 168
catch (Exception e)
#pragma warning restore 168
{
// ok
}

// LUCENENET specific - In Lucene, there were assertions set up inside TVReaders which throw AssertionError
// (provided assertions are enabled), which in turn signaled this class to skip the check by catching AssertionError.
// In .NET, assertions are not included in the release and cannot be enabled, so there is nothing to catch.
// We have to explicitly exclude the types that rely on this behavior from the check. Otherwise, they would fall
// through to Assert.Fail().
//
// We also have a fake AssertionException for testing mocks. We cannot throw InvalidOperationException in those
// cases because that exception is expected in other contexts.
Assert.ThrowsAnyOf<InvalidOperationException, AssertionError>(() => docsAndPositionsEnum.NextPosition());

// try
// {
// docsAndPositionsEnum.NextPosition();
// Assert.Fail();
// }
//#pragma warning disable 168
// catch (Exception e)
//#pragma warning restore 168
// {
// // ok
// }
}
Assert.AreEqual(DocsEnum.NO_MORE_DOCS, docsAndPositionsEnum.NextDoc());
}
Expand Down
50 changes: 47 additions & 3 deletions src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;

namespace Lucene.Net.Codecs
{
Expand Down Expand Up @@ -433,11 +434,54 @@ public override string ToString()
return "BLOCK: " + Prefix.Utf8ToString();
}

// LUCENENET specific - to keep the Debug.Assert statement from throwing exceptions
// because of invalid UTF8 code in Prefix, we have a wrapper method that falls back
// to using PendingBlock.Prefix.ToString() if PendingBlock.ToString()
private string ToString(IList<PendingBlock> blocks) // For assert
{
if (blocks == null)
return "null";


if (blocks.Count == 0)
return "[]";

using (var it = blocks.GetEnumerator())
{
StringBuilder sb = new StringBuilder();
sb.Append('[');
it.MoveNext();
while (true)
{
var e = it.Current;
// There is a chance that the Prefix will contain invalid UTF8,
// so we catch that and use the alternative way of displaying it
try
{
sb.Append(e.ToString());
}
catch (IndexOutOfRangeException)
{
sb.Append("BLOCK: ");
sb.Append(e.Prefix.ToString());
}
if (!it.MoveNext())
{
return sb.Append(']').ToString();
}
sb.Append(',').Append(' ');
}
}
}

public void CompileIndex(IList<PendingBlock> floorBlocks, RAMOutputStream scratchBytes)
{
// LUCENENET TODO: floorBlocks cannot be converted using Arrays.ToString() here.
// It generates an IndexOutOfRangeException()
Debug.Assert((IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null), "isFloor=" + IsFloor + " floorBlocks=" + floorBlocks /*Arrays.ToString(floorBlocks)*/);
// LUCENENET specific - we use a custom wrapper function to display floorBlocks, since
// it might contain garbage that cannot be converted into text. This is compiled out
// of the relese, though.
Debug.Assert(
(IsFloor && floorBlocks != null && floorBlocks.Count != 0) || (!IsFloor && floorBlocks == null),
"isFloor=" + IsFloor + " floorBlocks=" + ToString(floorBlocks));

Debug.Assert(scratchBytes.GetFilePointer() == 0);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ internal void Finish(int numDocs, long maxPointer)
{
if (numDocs != totalDocs)
{
throw new Exception("Expected " + numDocs + " docs, but got " + totalDocs);
throw new InvalidOperationException("Expected " + numDocs + " docs, but got " + totalDocs);
}
if (blockChunks > 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1071,11 +1071,11 @@ private void CheckDoc()
{
if (doc == NO_MORE_DOCS)
{
throw new Exception("DocsEnum exhausted");
throw new InvalidOperationException("DocsEnum exhausted");
}
else if (doc == -1)
{
throw new Exception("DocsEnum not started");
throw new InvalidOperationException("DocsEnum not started");
}
}

Expand All @@ -1084,23 +1084,23 @@ private void CheckPosition()
CheckDoc();
if (i < 0)
{
throw new Exception("Position enum not started");
throw new InvalidOperationException("Position enum not started");
}
else if (i >= termFreq)
{
throw new Exception("Read past last position");
throw new InvalidOperationException("Read past last position");
}
}

public override int NextPosition()
{
if (doc != 0)
{
throw new Exception();
throw new InvalidOperationException();
}
else if (i >= termFreq - 1)
{
throw new Exception("Read past last position");
throw new InvalidOperationException("Read past last position");
}

++i;
Expand Down
17 changes: 10 additions & 7 deletions src/Lucene.Net/Codecs/Lucene3x/Lucene3xTermVectorsReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -804,13 +804,16 @@ public override BytesRef GetPayload()

public override int NextPosition()
{
// LUCENENET TODO: on .NET Core 2.0, this assert causes an uncatchable exception.
// See: https://github.com/Microsoft/vstest/issues/1022
// Once the issue has been identified and fixed we can remove this conditional
// compilation for it on .NET Core 2.0.
#if !NETSTANDARD2_0 && !NETSTANDARD2_1
Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
#endif
//Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);

// LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
// caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
// part that is checking for an error after reading to the end of the enumerator.

// Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
// in this case, which matches the behavior of Lucene 8. See #267.
if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false)
throw new InvalidOperationException("Read past last position");

if (positions != null)
{
Expand Down
15 changes: 10 additions & 5 deletions src/Lucene.Net/Codecs/Lucene40/Lucene40TermVectorsReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -808,11 +808,16 @@ public override BytesRef GetPayload()

public override int NextPosition()
{
// LUCENENET TODO: BUG - Need to investigate why this assert sometimes fails
// which will cause the test runner to crash on .NET Core 2.0
#if !NETSTANDARD2_0 && !NETSTANDARD2_1
Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);
#endif
//Debug.Assert((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length);

// LUCENENET: The above assertion was for control flow when testing. In Java, it would throw an AssertionError, which is
// caught by the BaseTermVectorsFormatTestCase.assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) method in the
// part that is checking for an error after reading to the end of the enumerator.

// Since there is no way to turn on assertions in a release build in .NET, we are throwing an InvalidOperationException
// in this case, which matches the behavior of Lucene 8. See #267.
if (((positions != null && nextPos < positions.Length) || startOffsets != null && nextPos < startOffsets.Length) == false)
throw new InvalidOperationException("Read past last position");

if (positions != null)
{
Expand Down

0 comments on commit 47fa223

Please sign in to comment.