Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

smarter nested column index utilization #13977

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ public AutoTypeColumnMerger(
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
@SuppressWarnings("unused") ProgressIndicator progressIndicator,
Closer closer
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public DimensionMergerV9 makeMerger(
Closer closer
)
{
return new AutoTypeColumnMerger(name, indexSpec, segmentWriteOutMedium, progress, closer);
return new AutoTypeColumnMerger(name, indexSpec, segmentWriteOutMedium, closer);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ public class NestedDataColumnMerger implements DimensionMergerV9
private final String name;
private final IndexSpec indexSpec;
private final SegmentWriteOutMedium segmentWriteOutMedium;
private final ProgressIndicator progressIndicator;
private final Closer closer;

private ColumnDescriptor.Builder descriptorBuilder;
Expand All @@ -64,15 +63,13 @@ public NestedDataColumnMerger(
String name,
IndexSpec indexSpec,
SegmentWriteOutMedium segmentWriteOutMedium,
ProgressIndicator progressIndicator,
Closer closer
)
{

this.name = name;
this.indexSpec = indexSpec;
this.segmentWriteOutMedium = segmentWriteOutMedium;
this.progressIndicator = progressIndicator;
this.closer = closer;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public DimensionMergerV9 makeMerger(
Closer closer
)
{
return new NestedDataColumnMerger(name, indexSpec, segmentWriteOutMedium, progress, closer);
return new NestedDataColumnMerger(name, indexSpec, segmentWriteOutMedium, closer);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,19 @@ public interface ColumnConfig
* If the total number of rows in a column multiplied by this value is smaller than the total number of bitmap
* index operations required to perform to use a {@link LexicographicalRangeIndex} or {@link NumericRangeIndex},
* then for any {@link ColumnIndexSupplier} which chooses to participate in this config it will skip computing the
* index, in favor of doing a full scan and using a {@link org.apache.druid.query.filter.ValueMatcher} instead.
* This is indicated returning null from {@link ColumnIndexSupplier#as(Class)} even though it would have otherwise
* been able to create a {@link BitmapColumnIndex}. For range indexes on columns where every value has an index, the
* number of bitmap operations is determined by how many individual values fall in the range, a subset of the columns
* total cardinality.
* index, indicated by a return value of null from the 'forRange' methods, to force the filter to be processed
* with a scan using a {@link org.apache.druid.query.filter.ValueMatcher} instead.
* <p>
* Currently only the {@link org.apache.druid.segment.nested.NestedFieldLiteralColumnIndexSupplier} implements this
* behavior.
* For range indexes on columns where every value has an index, the number of bitmap operations is determined by how
* many individual values fall in the range, a subset of the columns total cardinality.
* <p>
* This can make many standalone filters faster in cases where the overhead of doing bitmap operations to construct a
* {@link org.apache.druid.segment.BitmapOffset} or {@link org.apache.druid.segment.vector.BitmapVectorOffset} can
* often exceed the cost of just using doing a full scan and using a
* {@link org.apache.druid.query.filter.ValueMatcher}.
* Currently only the {@link org.apache.druid.segment.nested.NestedCommonFormatColumn} implementations of
* {@link ColumnIndexSupplier} support this behavior.
* <p>
* This can make some standalone filters faster in cases where the overhead of walking the value dictionary and
* combining bitmaps to construct a {@link org.apache.druid.segment.BitmapOffset} or
* {@link org.apache.druid.segment.vector.BitmapVectorOffset} can exceed the cost of just using doing a full scan
* and using a {@link org.apache.druid.query.filter.ValueMatcher}.
* <p>
* Where this is especially useful is in cases where the range index is used as part of some
* {@link org.apache.druid.segment.filter.AndFilter}, which segment processing partitions into groups of 'pre'
Expand All @@ -65,13 +65,13 @@ default double skipValueRangeIndexScale()
* {@link BitmapColumnIndex}. For predicate indexes, this is determined by the total value cardinality of the column
* for columns with an index for every value.
* <p>
* Currently only the {@link org.apache.druid.segment.nested.NestedFieldLiteralColumnIndexSupplier} implements this
* behavior.
* Currently only the {@link org.apache.druid.segment.nested.NestedCommonFormatColumn} implementations of
* {@link ColumnIndexSupplier} support this behavior.
* <p>
* This can make many standalone filters faster in cases where the overhead of doing bitmap operations to construct a
* {@link org.apache.druid.segment.BitmapOffset} or {@link org.apache.druid.segment.vector.BitmapVectorOffset} can
* often exceed the cost of just using doing a full scan and using a
* {@link org.apache.druid.query.filter.ValueMatcher}.
* This can make some standalone filters faster in cases where the overhead of walking the value dictionary and
* combining bitmaps to construct a {@link org.apache.druid.segment.BitmapOffset} or
* {@link org.apache.druid.segment.vector.BitmapVectorOffset} can exceed the cost of just using doing a full scan
* and using a {@link org.apache.druid.query.filter.ValueMatcher}.
* <p>
* Where this is especially useful is in cases where the predicate index is used as part of some
* {@link org.apache.druid.segment.filter.AndFilter}, which segment processing partitions into groups of 'pre'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,6 @@ public interface DictionaryEncodedStringValueIndex extends DictionaryEncodedValu
@Nullable
String getValue(int index);

@SuppressWarnings({"unreachable", "unused"})
BitmapFactory getBitmapFactory();
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,12 @@ public interface DruidPredicateIndex
*/
@Nullable
BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory);

static boolean checkSkipThreshold(@Nullable ColumnConfig columnConfig, int numRowsToScan, int dictionaryCardinality)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldSkip? checkSkipThreshold doesn't really tell me how I'm supposed to interpret true/false. Does true mean that it exceeded the threshold and therefore should be skipped? Or does it mean that it did not exceed the threshold and should be used?

{
if (columnConfig == null) {
return false;
}
return dictionaryCardinality > (int) Math.ceil(columnConfig.skipValuePredicateIndexScale() * numRowsToScan);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,41 @@ public final class IndexedStringDruidPredicateIndex<TDictionary extends Indexed<
private final BitmapFactory bitmapFactory;
private final TDictionary dictionary;
private final Indexed<ImmutableBitmap> bitmaps;
@Nullable
private final ColumnConfig columnConfig;
private final int numRows;

public IndexedStringDruidPredicateIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps
)
{
this(bitmapFactory, dictionary, bitmaps, null, Integer.MAX_VALUE);
}

public IndexedStringDruidPredicateIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
@Nullable ColumnConfig columnConfig,
int numRows
)
{
this.bitmapFactory = bitmapFactory;
this.dictionary = dictionary;
this.bitmaps = bitmaps;
this.columnConfig = columnConfig;
this.numRows = numRows;
}

@Override
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, dictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
{
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,22 +44,36 @@ public final class IndexedUtf8LexicographicalRangeIndex<TDictionary extends Inde
private final Indexed<ImmutableBitmap> bitmaps;
private final boolean hasNull;

private final int skipRangeIndexThreshold;
@Nullable
private final ColumnConfig columnConfig;
private final int numRows;

public IndexedUtf8LexicographicalRangeIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
boolean hasNull
)
{
this(bitmapFactory, dictionary, bitmaps, hasNull, null, Integer.MAX_VALUE);
}

public IndexedUtf8LexicographicalRangeIndex(
BitmapFactory bitmapFactory,
TDictionary dictionary,
Indexed<ImmutableBitmap> bitmaps,
boolean hasNull,
int skipRangeIndexThreshold
@Nullable ColumnConfig columnConfig,
int numRows
)
{
Preconditions.checkArgument(dictionary.isSorted(), "Dictionary must be sorted");
this.bitmapFactory = bitmapFactory;
this.dictionary = dictionary;
this.bitmaps = bitmaps;
this.hasNull = hasNull;
this.skipRangeIndexThreshold = skipRangeIndexThreshold;
this.columnConfig = columnConfig;
this.numRows = numRows;
}

@Override
Expand All @@ -73,7 +87,7 @@ public BitmapColumnIndex forRange(
{
final IntIntPair range = getRange(startValue, startStrict, endValue, endStrict);
final int start = range.leftInt(), end = range.rightInt();
if ((end - start) > skipRangeIndexThreshold) {
if (LexicographicalRangeIndex.checkSkipThreshold(columnConfig, numRows, end - start)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,12 @@ BitmapColumnIndex forRange(
boolean endStrict,
Predicate<String> matcher
);

static boolean checkSkipThreshold(@Nullable ColumnConfig columnConfig, int numRows, int rangeSize)
{
if (columnConfig == null) {
return false;
}
return rangeSize > (int) Math.ceil(columnConfig.skipValueRangeIndexScale() * numRows);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,9 @@ BitmapColumnIndex forRange(
@Nullable Number endValue,
boolean endStrict
);

static boolean checkSkipThreshold(ColumnConfig columnConfig, int numRowsToScan, int rangeSize)
{
return rangeSize > (int) Math.ceil(columnConfig.skipValueRangeIndexScale() * numRowsToScan);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You aren't doing the null check on this one. That intentional?

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ public class NestedFieldColumnIndexSupplier<TStringDictionary extends Indexed<By

private final int adjustLongId;
private final int adjustDoubleId;
private final int skipRangeIndexThreshold;
private final boolean skipPredicateIndex;
private final ColumnConfig columnConfig;
private final int numRows;

public NestedFieldColumnIndexSupplier(
FieldTypeInfo.TypeSet types,
Expand All @@ -121,8 +121,8 @@ public NestedFieldColumnIndexSupplier(
this.arrayElementBitmaps = arrayElementBitmaps;
this.adjustLongId = globalStringDictionarySupplier.get().size();
this.adjustDoubleId = adjustLongId + globalLongDictionarySupplier.get().size();
this.skipRangeIndexThreshold = (int) Math.ceil(columnConfig.skipValueRangeIndexScale() * numRows);
this.skipPredicateIndex = localDictionarySupplier.get().size() > Math.ceil(columnConfig.skipValuePredicateIndexScale() * numRows);
this.columnConfig = columnConfig;
this.numRows = numRows;
}

@Nullable
Expand Down Expand Up @@ -285,7 +285,7 @@ private <T> BitmapColumnIndex makeRangeIndex(
final int startIndex = localRange.leftInt();
final int endIndex = localRange.rightInt();
final int size = endIndex - startIndex;
if (size > skipRangeIndexThreshold) {
if (NumericRangeIndex.checkSkipThreshold(columnConfig, numRows, size)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down Expand Up @@ -479,7 +479,7 @@ public BitmapColumnIndex forRange(
0
);
final int start = range.leftInt(), end = range.rightInt();
if ((end - start) > skipRangeIndexThreshold) {
if (NumericRangeIndex.checkSkipThreshold(columnConfig, numRows, end - start)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down Expand Up @@ -539,7 +539,8 @@ private class NestedStringPredicateIndex implements DruidPredicateIndex
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (skipPredicateIndex) {
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand All @@ -549,7 +550,6 @@ public Iterable<ImmutableBitmap> getBitmapIterable()
{
return () -> new Iterator<ImmutableBitmap>()
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final Indexed<ByteBuffer> stringDictionary = globalStringDictionarySupplier.get();
final Predicate<String> stringPredicate = matcherFactory.makeStringPredicate();

Expand Down Expand Up @@ -747,7 +747,8 @@ private class NestedLongPredicateIndex implements DruidPredicateIndex
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (skipPredicateIndex) {
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand All @@ -757,7 +758,6 @@ public Iterable<ImmutableBitmap> getBitmapIterable()
{
return () -> new Iterator<ImmutableBitmap>()
{
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
final FixedIndexed<Long> longDictionary = globalLongDictionarySupplier.get();
final DruidLongPredicate longPredicate = matcherFactory.makeLongPredicate();

Expand Down Expand Up @@ -959,7 +959,8 @@ private class NestedDoublePredicateIndex implements DruidPredicateIndex
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (skipPredicateIndex) {
final FixedIndexed<Integer> localDictionary = localDictionarySupplier.get();
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down Expand Up @@ -1158,7 +1159,7 @@ private class NestedVariantPredicateIndex extends NestedVariantLiteralIndex impl
@Nullable
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (skipPredicateIndex) {
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, localDictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ public static ScalarDoubleColumnAndIndexSupplier read(

private final BitmapFactory bitmapFactory;
private final ImmutableBitmap nullValueBitmap;
private final int skipRangeIndexThreshold;
private final boolean skipPredicateIndex;
private final ColumnConfig columnConfig;
private final int numRows;

private ScalarDoubleColumnAndIndexSupplier(
Supplier<FixedIndexed<Double>> longDictionary,
Expand All @@ -165,8 +165,8 @@ private ScalarDoubleColumnAndIndexSupplier(
this.valueIndexes = valueIndexes;
this.bitmapFactory = bitmapFactory;
this.nullValueBitmap = valueIndexes.get(0) == null ? bitmapFactory.makeEmptyImmutableBitmap() : valueIndexes.get(0);
this.skipRangeIndexThreshold = (int) Math.ceil(columnConfig.skipValueRangeIndexScale() * numRows);
this.skipPredicateIndex = doubleDictionarySupplier.get().size() > Math.ceil(columnConfig.skipValuePredicateIndexScale() * numRows);
this.columnConfig = columnConfig;
this.numRows = numRows;
}

@Override
Expand Down Expand Up @@ -349,7 +349,7 @@ public BitmapColumnIndex forRange(

final int startIndex = range.leftInt();
final int endIndex = range.rightInt();
if (endIndex - startIndex > skipRangeIndexThreshold) {
if (NumericRangeIndex.checkSkipThreshold(columnConfig, numRows, endIndex - startIndex)) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down Expand Up @@ -384,7 +384,8 @@ private class DoublePredicateIndex implements DruidPredicateIndex
@Override
public BitmapColumnIndex forPredicate(DruidPredicateFactory matcherFactory)
{
if (skipPredicateIndex) {
final FixedIndexed<Double> dictionary = doubleDictionarySupplier.get();
if (DruidPredicateIndex.checkSkipThreshold(columnConfig, numRows, dictionary.size())) {
return null;
}
return new SimpleImmutableBitmapIterableIndex()
Expand Down
Loading