2323import org .apache .lucene .document .BinaryDocValuesField ;
2424import org .apache .lucene .document .Document ;
2525import org .apache .lucene .document .Field ;
26+ import org .apache .lucene .document .SortedDocValuesField ;
2627import org .apache .lucene .document .SortedNumericDocValuesField ;
2728import org .apache .lucene .document .StoredField ;
2829import org .apache .lucene .index .DirectoryReader ;
@@ -146,8 +147,9 @@ public void testSignificance() throws IOException {
146147 IndexSearcher searcher = new IndexSearcher (reader );
147148
148149 // Search "odd"
149- SignificantTerms terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigAgg , textFieldType );
150+ SignificantStringTerms terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigAgg , textFieldType );
150151
152+ assertThat (terms .getSubsetSize (), equalTo (5L ));
151153 assertEquals (1 , terms .getBuckets ().size ());
152154 assertNull (terms .getBucketByKey ("even" ));
153155 assertNull (terms .getBucketByKey ("common" ));
@@ -156,6 +158,7 @@ public void testSignificance() throws IOException {
156158 // Search even
157159 terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "even" )), sigAgg , textFieldType );
158160
161+ assertThat (terms .getSubsetSize (), equalTo (5L ));
159162 assertEquals (1 , terms .getBuckets ().size ());
160163 assertNull (terms .getBucketByKey ("odd" ));
161164 assertNull (terms .getBucketByKey ("common" ));
@@ -164,6 +167,7 @@ public void testSignificance() throws IOException {
164167 // Search odd with regex includeexcludes
165168 sigAgg .includeExclude (new IncludeExclude ("o.d" , null ));
166169 terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigAgg , textFieldType );
170+ assertThat (terms .getSubsetSize (), equalTo (5L ));
167171 assertEquals (1 , terms .getBuckets ().size ());
168172 assertNotNull (terms .getBucketByKey ("odd" ));
169173 assertNull (terms .getBucketByKey ("common" ));
@@ -176,6 +180,7 @@ public void testSignificance() throws IOException {
176180 sigAgg .includeExclude (new IncludeExclude (oddStrings , evenStrings ));
177181 sigAgg .significanceHeuristic (SignificanceHeuristicTests .getRandomSignificanceheuristic ());
178182 terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigAgg , textFieldType );
183+ assertThat (terms .getSubsetSize (), equalTo (5L ));
179184 assertEquals (1 , terms .getBuckets ().size ());
180185 assertNotNull (terms .getBucketByKey ("odd" ));
181186 assertNull (terms .getBucketByKey ("weird" ));
@@ -185,6 +190,7 @@ public void testSignificance() throws IOException {
185190
186191 sigAgg .includeExclude (new IncludeExclude (evenStrings , oddStrings ));
187192 terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigAgg , textFieldType );
193+ assertThat (terms .getSubsetSize (), equalTo (5L ));
188194 assertEquals (0 , terms .getBuckets ().size ());
189195 assertNull (terms .getBucketByKey ("odd" ));
190196 assertNull (terms .getBucketByKey ("weird" ));
@@ -240,13 +246,15 @@ public void testNumericSignificance() throws IOException {
240246 // Search "odd"
241247 SignificantLongTerms terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "odd" )), sigNumAgg , longFieldType );
242248 assertEquals (1 , terms .getBuckets ().size ());
249+ assertThat (terms .getSubsetSize (), equalTo (5L ));
243250
244251 assertNull (terms .getBucketByKey (Long .toString (EVEN_VALUE )));
245252 assertNull (terms .getBucketByKey (Long .toString (COMMON_VALUE )));
246253 assertNotNull (terms .getBucketByKey (Long .toString (ODD_VALUE )));
247254
248255 terms = searchAndReduce (searcher , new TermQuery (new Term ("text" , "even" )), sigNumAgg , longFieldType );
249256 assertEquals (1 , terms .getBuckets ().size ());
257+ assertThat (terms .getSubsetSize (), equalTo (5L ));
250258
251259 assertNull (terms .getBucketByKey (Long .toString (ODD_VALUE )));
252260 assertNull (terms .getBucketByKey (Long .toString (COMMON_VALUE )));
@@ -379,6 +387,172 @@ public void testFieldAlias() throws IOException {
379387 }
380388 }
381389
390+ public void testAllDocsWithoutStringFieldviaGlobalOrds () throws IOException {
391+ testAllDocsWithoutStringField ("global_ordinals" );
392+ }
393+
394+ public void testAllDocsWithoutStringFieldViaMap () throws IOException {
395+ testAllDocsWithoutStringField ("map" );
396+ }
397+
398+ /**
399+ * Make sure that when the field is mapped but there aren't any values
400+ * for it we return a properly shaped "empty" result. In particular, the
401+ * {@link InternalMappedSignificantTerms#getSubsetSize()} needs to be set
402+ * to the number of matching documents.
403+ */
404+ private void testAllDocsWithoutStringField (String executionHint ) throws IOException {
405+ try (Directory dir = newDirectory ()) {
406+ try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
407+ writer .addDocument (new Document ());
408+ try (IndexReader reader = maybeWrapReaderEs (writer .getReader ())) {
409+ IndexSearcher searcher = newIndexSearcher (reader );
410+ SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder ("f" ).field ("f" )
411+ .executionHint (executionHint );
412+ SignificantStringTerms result = search (searcher , new MatchAllDocsQuery (), request , keywordField ("f" ));
413+ assertThat (result .getSubsetSize (), equalTo (1L ));
414+ }
415+ }
416+ }
417+ }
418+
419+ /**
420+ * Make sure that when the field is mapped but there aren't any values
421+ * for it we return a properly shaped "empty" result. In particular, the
422+ * {@link InternalMappedSignificantTerms#getSubsetSize()} needs to be set
423+ * to the number of matching documents.
424+ */
425+ public void testAllDocsWithoutNumericField () throws IOException {
426+ try (Directory dir = newDirectory ()) {
427+ try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
428+ writer .addDocument (new Document ());
429+ try (IndexReader reader = maybeWrapReaderEs (writer .getReader ())) {
430+ IndexSearcher searcher = newIndexSearcher (reader );
431+ SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder ("f" ).field ("f" );
432+ SignificantLongTerms result = search (searcher , new MatchAllDocsQuery (), request , longField ("f" ));
433+ assertThat (result .getSubsetSize (), equalTo (1L ));
434+ }
435+ }
436+ }
437+ }
438+
439+ public void testSomeDocsWithoutStringFieldviaGlobalOrds () throws IOException {
440+ testSomeDocsWithoutStringField ("global_ordinals" );
441+ }
442+
443+ public void testSomeDocsWithoutStringFieldViaMap () throws IOException {
444+ testSomeDocsWithoutStringField ("map" );
445+ }
446+
447+ /**
448+ * Make sure that when the field a segment doesn't contain the field we
449+ * still include the count of its matching documents
450+ * in {@link InternalMappedSignificantTerms#getSubsetSize()}.
451+ */
452+ private void testSomeDocsWithoutStringField (String executionHint ) throws IOException {
453+ try (Directory dir = newDirectory ()) {
454+ try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
455+ Document d = new Document ();
456+ d .add (new SortedDocValuesField ("f" , new BytesRef ("f" )));
457+ writer .addDocument (d );
458+ writer .flush ();
459+ writer .addDocument (new Document ());
460+ try (IndexReader reader = maybeWrapReaderEs (writer .getReader ())) {
461+ IndexSearcher searcher = newIndexSearcher (reader );
462+ SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder ("f" ).field ("f" )
463+ .executionHint (executionHint );
464+ SignificantStringTerms result = search (searcher , new MatchAllDocsQuery (), request , keywordField ("f" ));
465+ assertThat (result .getSubsetSize (), equalTo (2L ));
466+ }
467+ }
468+ }
469+ }
470+
471+ /**
472+ * Make sure that when the field a segment doesn't contain the field we
473+ * still include the count of its matching documents
474+ * in {@link InternalMappedSignificantTerms#getSubsetSize()}.
475+ */
476+ public void testSomeDocsWithoutNumericField () throws IOException {
477+ try (Directory dir = newDirectory ()) {
478+ try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
479+ Document d = new Document ();
480+ d .add (new SortedNumericDocValuesField ("f" , 1 ));
481+ writer .addDocument (d );
482+ writer .addDocument (new Document ());
483+ try (IndexReader reader = maybeWrapReaderEs (writer .getReader ())) {
484+ IndexSearcher searcher = newIndexSearcher (reader );
485+ SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder ("f" ).field ("f" );
486+ SignificantLongTerms result = search (searcher , new MatchAllDocsQuery (), request , longField ("f" ));
487+ assertThat (result .getSubsetSize (), equalTo (2L ));
488+ }
489+ }
490+ }
491+ }
492+
493+ public void testThreeLayerStringViaGlobalOrds () throws IOException {
494+ threeLayerStringTestCase ("global_ordinals" );
495+ }
496+
497+ public void testThreeLayerStringViaMap () throws IOException {
498+ threeLayerStringTestCase ("map" );
499+ }
500+
501+ private void threeLayerStringTestCase (String executionHint ) throws IOException {
502+ try (Directory dir = newDirectory ()) {
503+ try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
504+ for (int i = 0 ; i < 10 ; i ++) {
505+ for (int j = 0 ; j < 10 ; j ++) {
506+ for (int k = 0 ; k < 10 ; k ++) {
507+ Document d = new Document ();
508+ d .add (new SortedDocValuesField ("i" , new BytesRef (Integer .toString (i ))));
509+ d .add (new SortedDocValuesField ("j" , new BytesRef (Integer .toString (j ))));
510+ d .add (new SortedDocValuesField ("k" , new BytesRef (Integer .toString (k ))));
511+ writer .addDocument (d );
512+ }
513+ }
514+ }
515+ try (IndexReader reader = maybeWrapReaderEs (writer .getReader ())) {
516+ IndexSearcher searcher = newIndexSearcher (reader );
517+ SignificantTermsAggregationBuilder kRequest = new SignificantTermsAggregationBuilder ("k" ).field ("k" )
518+ .executionHint (executionHint );
519+ SignificantTermsAggregationBuilder jRequest = new SignificantTermsAggregationBuilder ("j" ).field ("j" )
520+ .executionHint (executionHint )
521+ .subAggregation (kRequest );
522+ SignificantTermsAggregationBuilder request = new SignificantTermsAggregationBuilder ("i" ).field ("i" )
523+ .executionHint (executionHint )
524+ .subAggregation (jRequest );
525+ SignificantStringTerms result = search (
526+ searcher ,
527+ new MatchAllDocsQuery (),
528+ request ,
529+ keywordField ("i" ),
530+ keywordField ("j" ),
531+ keywordField ("k" )
532+ );
533+ assertThat (result .getSubsetSize (), equalTo (1000L ));
534+ for (int i = 0 ; i < 10 ; i ++) {
535+ SignificantStringTerms .Bucket iBucket = result .getBucketByKey (Integer .toString (i ));
536+ assertThat (iBucket .getDocCount (), equalTo (100L ));
537+ SignificantStringTerms jAgg = iBucket .getAggregations ().get ("j" );
538+ assertThat (jAgg .getSubsetSize (), equalTo (100L ));
539+ for (int j = 0 ; j < 10 ; j ++) {
540+ SignificantStringTerms .Bucket jBucket = jAgg .getBucketByKey (Integer .toString (j ));
541+ assertThat (jBucket .getDocCount (), equalTo (10L ));
542+ SignificantStringTerms kAgg = jBucket .getAggregations ().get ("k" );
543+ assertThat (kAgg .getSubsetSize (), equalTo (10L ));
544+ for (int k = 0 ; k < 10 ; k ++) {
545+ SignificantStringTerms .Bucket kBucket = kAgg .getBucketByKey (Integer .toString (k ));
546+ assertThat (jAgg .getSubsetSize (), equalTo (100L ));
547+ assertThat (kBucket .getDocCount (), equalTo (1L ));
548+ }
549+ }
550+ }
551+ }
552+ }
553+ }
554+ }
555+
382556 public void testThreeLayerLong () throws IOException {
383557 try (Directory dir = newDirectory ()) {
384558 try (RandomIndexWriter writer = new RandomIndexWriter (random (), dir )) {
@@ -400,14 +574,17 @@ public void testThreeLayerLong() throws IOException {
400574 .subAggregation (new SignificantTermsAggregationBuilder ("k" ).field ("k" )));
401575 SignificantLongTerms result = search (searcher , new MatchAllDocsQuery (), request ,
402576 longField ("i" ), longField ("j" ), longField ("k" ));
577+ assertThat (result .getSubsetSize (), equalTo (1000L ));
403578 for (int i = 0 ; i < 10 ; i ++) {
404579 SignificantLongTerms .Bucket iBucket = result .getBucketByKey (Integer .toString (i ));
405580 assertThat (iBucket .getDocCount (), equalTo (100L ));
406581 SignificantLongTerms jAgg = iBucket .getAggregations ().get ("j" );
582+ assertThat (jAgg .getSubsetSize (), equalTo (100L ));
407583 for (int j = 0 ; j < 10 ; j ++) {
408584 SignificantLongTerms .Bucket jBucket = jAgg .getBucketByKey (Integer .toString (j ));
409585 assertThat (jBucket .getDocCount (), equalTo (10L ));
410586 SignificantLongTerms kAgg = jBucket .getAggregations ().get ("k" );
587+ assertThat (kAgg .getSubsetSize (), equalTo (10L ));
411588 for (int k = 0 ; k < 10 ; k ++) {
412589 SignificantLongTerms .Bucket kBucket = kAgg .getBucketByKey (Integer .toString (k ));
413590 assertThat (kBucket .getDocCount (), equalTo (1L ));
0 commit comments