Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lucene.Net: 4.8 SetNextReader executes repeatedly and returns only one result #915

Closed
1 task done
mowali opened this issue Feb 20, 2024 · 2 comments
Closed
1 task done
Labels
is:task A chore to be done

Comments

@mowali
Copy link

mowali commented Feb 20, 2024

Is there an existing issue for this?

  • I have searched the existing issues

Task description

I updated from Lucene from 3.0 to 4.8, I amended my FieldComparer code. I use a comparator to get results back and I get the results via the call to SetNextReader. I only have 1 indexreader to read from:

public override void SetNextReader(IndexReader reader, int docBase)
{
	currentReaderValues = FieldCache_Fields.DEFAULT.GetStrings(reader, field);
}

public override FieldComparer SetNextReader(AtomicReaderContext context)
{
	sortedResults = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, field);
	return this;
}

the older verison SetNextReader is executed once correctly and returning 100 results of while the new SetNextReader executes multiple time and returning only one result. Attached is my old and new code. The searcher is setup like this:

var searchFields = new search[4];
searchFields[0] = "Id";
searchFields[1] = "FirstName";
searchFields[2] = "LastName";
searchFields[3] = "Tag";

var analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
var parser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, searchFields, analyzer)
{
    AllowLeadingWildcard = true,
    DefaultOperator = QueryParserBase.AND_OPERATOR
};

var searchCriteria = "Jones"
var query = parser.Parse(searchCriteria);

var directory = FSDirectory.Open(new DirectoryInfo(indexDirectory));
DirectoryReader di = DirectoryReader.Open(directory);
var searcher = new IndexSearcher(di);   

var filter = new QueryWrapperFilter(query);
var topDocs = searcher.Search(query, filter, (1 * 25), sort);
	
// OLD COMPARATOR CODE WITH 3.0
internal class TagComparator : FieldComparator
{
    private string[] values;
    private string[] currentReaderValues;
    private string field;
    private string bottom; 
    private bool reversed;

    public TagComparator(int numHits, string field, bool reversed)
    {
        values = new string[numHits];
        this.field = field;
        this.reversed = reversed;
    }

    public override int Compare(int slot1, int slot2)
    {
        string v1 = values[slot1];
        string v2 = values[slot2];

        return DoCompare(v1, v2);
    }

    public override int CompareBottom(int doc)
    {
        string v2 = currentReaderValues[doc];

        return DoCompare(bottom, v2);
    }

    private int DoCompare(string v1, string v2)
    {
        if (string.IsNullOrEmpty(v1))
        {
            if (string.IsNullOrEmpty(v2))
            {
                return 0;
            }

            return reversed ? -1 : 1;
        }

        if (string.IsNullOrEmpty(v2))
        {
            return reversed ? 1 : -1;
        }

        return v1.CompareTo(v2);
    }

    public override void Copy(int slot, int doc)
    {
        values[slot] = currentReaderValues[doc];
    }

    public override void SetNextReader(IndexReader reader, int docBase)
    {
        currentReaderValues = FieldCache_Fields.DEFAULT.GetStrings(reader, field);
    }

    public override void SetBottom(int bottom)
    {
        this.bottom = values[bottom];
    }

    public override IComparable this[int slot] => values[slot];
}

//NEW COMPARER CODE WITH 4.8 **
internal class TagComparator : FieldComparer<BytesRef>
{
    protected readonly ILog Log;
    private readonly BytesRef[] bvalues;
    private readonly string field;
    private readonly bool reversed;
    private BytesRef termCopy = new BytesRef();
    private SortedDocValues sortedResults;
    private int bottomSlot;

    public override BytesRef this[int slot] => bvalues[slot];

    public TagComparator(int numHits, string field, bool reversed)
    {
        bvalues = new BytesRef[numHits];
        this.field = field;
        this.reversed = reversed;
    }

    public override int Compare(int slot1, int slot2)
    {
        var result1 = DoCompare(bvalues[slot1], bvalues[slot2]);
        return result1;
    }

    private int DoCompare(BytesRef v1, BytesRef v2)
    {
        if (v1.Length == 0)
        {
            if (v2.Length == 0)
            {
                return 0;
            }

            return reversed ? -1 : 1;
        }

        if (v2.Length == 0)
        {
            return reversed ? 1 : -1;
        }

        if (v1.CompareTo(v2) > 0)
            return 1;
        else
            return -1;
    }

    public override void Copy(int slot, int doc)
    {
        termCopy = new BytesRef();
        sortedResults.Get(doc, termCopy);
        bvalues[slot] = termCopy;
    }

    public override int CompareBottom(int doc)
    {
        BytesRef termOrd = new BytesRef();
        int ord = sortedResults.GetOrd(doc);
        sortedResults.LookupOrd(ord, termOrd);
        var result = DoCompare(bvalues[bottomSlot], termOrd);

        return result;
    }

    public override void SetBottom(int bottom)
    {
        bottomSlot = bottom;
    }

    public override FieldComparer SetNextReader(AtomicReaderContext context)
    {
        sortedResults = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, field);
        return this;
    }

    public override int CompareTop(int doc)
    {
        throw new NotImplementedException();
    }

    public override void SetTopValue(BytesRef value)
    {
        throw new NotImplementedException();
    }
}
@mowali mowali added the is:task A chore to be done label Feb 20, 2024
@NightOwl888
Copy link
Contributor

Maybe I am missing something, but why aren't you using the built-in string comparer (TermOrdValComparer) with reverse support?

/// <summary>
/// Tests reverse sorting on type string with a missing
/// value sorted last
/// </summary>
[Test]
public virtual void TestStringValMissingSortedLastReverse()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, dir);
Document doc = new Document();
writer.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("value", "foo", Field.Store.YES));
writer.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("value", "bar", Field.Store.YES));
writer.AddDocument(doc);
IndexReader ir = writer.GetReader();
writer.Dispose();
IndexSearcher searcher = NewSearcher(ir);
SortField sf = new SortField("value", SortFieldType.STRING, true);
sf.SetMissingValue(SortField.STRING_LAST);
Sort sort = new Sort(sf);
TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
Assert.AreEqual(3, td.TotalHits);
// null comes first
Assert.IsNull(searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));
Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[2].Doc).Get("value"));
ir.Dispose();
dir.Dispose();
}

/// <summary>
/// Tests reverse sorting on type string with a missing
/// value sorted first
/// </summary>
[Test]
public virtual void TestStringMissingSortedFirstReverse()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(Random, dir);
Document doc = new Document();
writer.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("value", "foo", Field.Store.YES));
writer.AddDocument(doc);
doc = new Document();
doc.Add(NewStringField("value", "bar", Field.Store.YES));
writer.AddDocument(doc);
IndexReader ir = writer.GetReader();
writer.Dispose();
IndexSearcher searcher = NewSearcher(ir);
SortField sf = new SortField("value", SortFieldType.STRING, true);
Sort sort = new Sort(sf);
TopDocs td = searcher.Search(new MatchAllDocsQuery(), 10, sort);
Assert.AreEqual(3, td.TotalHits);
Assert.AreEqual("foo", searcher.Doc(td.ScoreDocs[0].Doc).Get("value"));
Assert.AreEqual("bar", searcher.Doc(td.ScoreDocs[1].Doc).Get("value"));
// null comes last
Assert.IsNull(searcher.Doc(td.ScoreDocs[2].Doc).Get("value"));
ir.Dispose();
dir.Dispose();
}

If the custom FieldComparer is required, your main issue is that you are allocating memory inside of the comparison methods. Comparers should only allocate memory in the class constructor and/or field initializers, never in the comparison methods, because this will cause a lot of garbage collection overhead.

    public override void Copy(int slot, int doc)
    {
        termCopy = new BytesRef(); // Expensive allocation. This method will be called a lot.
        sortedResults.Get(doc, termCopy);
        bvalues[slot] = termCopy;
    }

    public override int CompareBottom(int doc)
    {
        BytesRef termOrd = new BytesRef(); // Expensive allocation. This method will be called a lot.
        int ord = sortedResults.GetOrd(doc);
        sortedResults.LookupOrd(ord, termOrd); // Expensive lookup. The TermOrdValComparer only does lookups in the Copy() method and only when necessary.
        var result = DoCompare(bvalues[bottomSlot], termOrd);

        return result;
    }

Also, the TermOrdValComparer uses ords to do the comparison in most cases, falling back to term comparison only when necessary. See the TermOrdValComparer docs and FieldComparer<T> docs.

@paulirwin
Copy link
Contributor

See response from @NightOwl888 above. Feel free to reopen if you feel like there is more to discuss here. Thanks!

@paulirwin paulirwin closed this as not planned Won't fix, can't repro, duplicate, stale Oct 28, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
is:task A chore to be done
Projects
None yet
Development

No branches or pull requests

3 participants