-
Notifications
You must be signed in to change notification settings - Fork 3.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ability to not rollup at index time, make pre aggregation an option #3020
Changes from 2 commits
b546ffd
94044d0
ca8f802
17ffaaf
2e82549
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ Segment metadata queries return per-segment information about: | |
* Interval the segment covers | ||
* Column type of all the columns in the segment | ||
* Estimated total segment byte size in if it was stored in a flat format | ||
* Is the segment rolled up | ||
* Segment id | ||
|
||
```json | ||
|
@@ -143,6 +144,11 @@ null if the aggregators are unknown or unmergeable (if merging is enabled). | |
|
||
* The form of the result is a map of column name to aggregator. | ||
|
||
#### rollup | ||
|
||
* `rollup` in the result is true/false/null. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does null mean? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a case, if previous segment is rolled up, and the next segment use new spec which is not to rollup, then for segmentMetadataQuery should handle this case and return null if merge=true |
||
* When merging is enabled, if some are rollup, others are not, result is null. | ||
|
||
### lenientAggregatorMerge | ||
|
||
Conflicts between aggregator metadata across segments can occur if some segments have unknown aggregators, or if | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ public class MergeTask extends MergeTaskBase | |
{ | ||
@JsonIgnore | ||
private final List<AggregatorFactory> aggregators; | ||
private final Boolean rollup; | ||
private final IndexSpec indexSpec; | ||
|
||
@JsonCreator | ||
|
@@ -52,12 +53,14 @@ public MergeTask( | |
@JsonProperty("dataSource") String dataSource, | ||
@JsonProperty("segments") List<DataSegment> segments, | ||
@JsonProperty("aggregations") List<AggregatorFactory> aggregators, | ||
@JsonProperty("rollup") Boolean rollup, | ||
@JsonProperty("indexSpec") IndexSpec indexSpec, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The documentation for the mergeTask needs to be updated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. already updated in tasks.md |
||
@JsonProperty("context") Map<String, Object> context | ||
) | ||
{ | ||
super(id, dataSource, segments, context); | ||
this.aggregators = Preconditions.checkNotNull(aggregators, "null aggregations"); | ||
this.rollup = rollup == null ? Boolean.TRUE : rollup; | ||
this.indexSpec = indexSpec == null ? new IndexSpec() : indexSpec; | ||
} | ||
|
||
|
@@ -82,6 +85,7 @@ public QueryableIndex apply(@Nullable File input) | |
} | ||
} | ||
), | ||
rollup, | ||
aggregators.toArray(new AggregatorFactory[aggregators.size()]), | ||
outDir, | ||
indexSpec | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -355,6 +355,14 @@ public static SegmentAnalysis mergeAnalyses( | |
mergedId = "merged"; | ||
} | ||
|
||
final Boolean rollup; | ||
|
||
if (arg1.isRollup() != null && arg2.isRollup() != null && arg1.isRollup().equals(arg2.isRollup())) { | ||
rollup = arg1.isRollup(); | ||
} else { | ||
rollup = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what does null rollup mean? I feel like whenever we can't make a decision rollup should be set to true with a comment that Druid rolls up data by default There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a case, if previous segment is rolled up, and the next segment use new spec which is not to rollup, then for segmentMetadataQuery should handle this case and return null if merge=true |
||
} | ||
|
||
return new SegmentAnalysis( | ||
mergedId, | ||
newIntervals, | ||
|
@@ -363,7 +371,8 @@ public static SegmentAnalysis mergeAnalyses( | |
arg1.getNumRows() + arg2.getNumRows(), | ||
aggregators.isEmpty() ? null : aggregators, | ||
timestampSpec, | ||
queryGranularity | ||
queryGranularity, | ||
rollup | ||
); | ||
} | ||
|
||
|
@@ -378,7 +387,8 @@ public static SegmentAnalysis finalizeAnalysis(SegmentAnalysis analysis) | |
analysis.getNumRows(), | ||
analysis.getAggregators(), | ||
analysis.getTimestampSpec(), | ||
analysis.getQueryGranularity() | ||
analysis.getQueryGranularity(), | ||
analysis.isRollup() | ||
); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"reorderded" should be "reordered"