Skip to content

Commit

Permalink
Add a non-blocking and partitioned groupBy implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangfu0 committed Dec 23, 2024
1 parent 616d691 commit c71c20d
Show file tree
Hide file tree
Showing 12 changed files with 757 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,17 @@ public static Integer getMinInitialIndexedTableCapacity(Map<String, String> quer
return checkedParseIntPositive(QueryOptionKey.MIN_INITIAL_INDEXED_TABLE_CAPACITY, minInitialIndexedTableCapacity);
}

@Nullable
public static String getGroupByAlgorithm(Map<String, String> queryOptions) {
return queryOptions.get(QueryOptionKey.GROUP_BY_ALGORITHM);
}

@Nullable
public static Integer getNumGroupByPartitions(Map<String, String> queryOptions) {
String numGroupByPartitionsString = queryOptions.get(QueryOptionKey.NUM_GROUP_BY_PARTITIONS);
return checkedParseIntPositive(QueryOptionKey.NUM_GROUP_BY_PARTITIONS, numGroupByPartitionsString);
}

public static boolean shouldDropResults(Map<String, String> queryOptions) {
return Boolean.parseBoolean(queryOptions.get(CommonConstants.Broker.Request.QueryOptionKey.DROP_RESULTS));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,29 @@ public int size() {

@Override
public Iterator<Record> iterator() {
if (_topRecords == null) {
return _lookupMap.values().iterator();
}
return _topRecords.iterator();
}


public void mergePartitionTable(Table table) {
if (table instanceof IndexedTable) {
_lookupMap.putAll(((IndexedTable) table)._lookupMap);
} else {
Iterator<Record> iterator = table.iterator();
while (iterator.hasNext()) {
// NOTE: For performance concern, does not check the return value of the upsert(). Override this method if
// upsert() can return false.
upsert(iterator.next());
}
}
if (_lookupMap.size() >= _trimThreshold) {
resize();
}
}

public int getNumResizes() {
return _numResizes;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ public class GroupByCombineOperator extends BaseSingleBlockCombineOperator<Group
private static final Logger LOGGER = LoggerFactory.getLogger(GroupByCombineOperator.class);
private static final String EXPLAIN_NAME = "COMBINE_GROUP_BY";

private final int _numAggregationFunctions;
private final int _numGroupByExpressions;
private final int _numColumns;
protected final int _numAggregationFunctions;
protected final int _numGroupByExpressions;
protected final int _numColumns;
// We use a CountDownLatch to track if all Futures are finished by the query timeout, and cancel the unfinished
// _futures (try to interrupt the execution if it already started).
private final CountDownLatch _operatorLatch;
protected final CountDownLatch _operatorLatch;

private volatile IndexedTable _indexedTable;
private volatile boolean _numGroupsLimitReached;
protected volatile IndexedTable _indexedTable;
protected volatile boolean _numGroupsLimitReached;

public GroupByCombineOperator(List<Operator> operators, QueryContext queryContext, ExecutorService executorService) {
super(null, operators, overrideMaxExecutionThreads(queryContext, operators.size()), executorService);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.core.operator.combine;

import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import org.apache.pinot.core.common.Operator;
import org.apache.pinot.core.data.table.IndexedTable;
import org.apache.pinot.core.data.table.IntermediateRecord;
import org.apache.pinot.core.data.table.Key;
import org.apache.pinot.core.data.table.Record;
import org.apache.pinot.core.operator.AcquireReleaseColumnsSegmentOperator;
import org.apache.pinot.core.operator.blocks.results.GroupByResultsBlock;
import org.apache.pinot.core.query.aggregation.groupby.AggregationGroupByResult;
import org.apache.pinot.core.query.aggregation.groupby.GroupKeyGenerator;
import org.apache.pinot.core.query.request.context.QueryContext;
import org.apache.pinot.core.util.GroupByUtils;
import org.apache.pinot.spi.trace.Tracing;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* Combine operator for group-by queries.
* TODO: Use CombineOperatorUtils.getNumThreadsForQuery() to get the parallelism of the query instead of using
* all threads
*/
@SuppressWarnings("rawtypes")
public class NonblockingGroupByCombineOperator extends GroupByCombineOperator {
public static final String ALGORITHM = "NON-BLOCKING";

private static final Logger LOGGER = LoggerFactory.getLogger(NonblockingGroupByCombineOperator.class);
private static final String EXPLAIN_NAME = "NON_BLOCKING_COMBINE_GROUP_BY";

public NonblockingGroupByCombineOperator(List<Operator> operators, QueryContext queryContext,
ExecutorService executorService) {
super(operators, queryContext, executorService);
LOGGER.info("Using {} for group-by combine with {} tasks", ALGORITHM, _numTasks);
}

@Override
public String toExplainString() {
return EXPLAIN_NAME;
}

/**
* Executes query on one segment in a worker thread and merges the results into the indexed table.
*/
@Override
protected void processSegments() {
int operatorId;
IndexedTable indexedTable = null;
while (_processingException.get() == null && (operatorId = _nextOperatorId.getAndIncrement()) < _numOperators) {
Operator operator = _operators.get(operatorId);
try {
if (operator instanceof AcquireReleaseColumnsSegmentOperator) {
((AcquireReleaseColumnsSegmentOperator) operator).acquire();
}
GroupByResultsBlock resultsBlock = (GroupByResultsBlock) operator.nextBlock();
if (_indexedTable != null) {
synchronized (this) {
if (_indexedTable != null) {
indexedTable = _indexedTable;
_indexedTable = null;
}
}
}
if (indexedTable == null) {
indexedTable = GroupByUtils.createIndexedTableForCombineOperator(resultsBlock, _queryContext, 1);
}

// Set groups limit reached flag.
if (resultsBlock.isNumGroupsLimitReached()) {
_numGroupsLimitReached = true;
}

// Merge aggregation group-by result.
// Iterate over the group-by keys, for each key, update the group-by result in the indexedTable
Collection<IntermediateRecord> intermediateRecords = resultsBlock.getIntermediateRecords();
// Count the number of merged keys
int mergedKeys = 0;
// For now, only GroupBy OrderBy query has pre-constructed intermediate records
if (intermediateRecords == null) {
// Merge aggregation group-by result.
AggregationGroupByResult aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
if (aggregationGroupByResult != null) {
// Iterate over the group-by keys, for each key, update the group-by result in the indexedTable
Iterator<GroupKeyGenerator.GroupKey> dicGroupKeyIterator = aggregationGroupByResult.getGroupKeyIterator();
while (dicGroupKeyIterator.hasNext()) {
GroupKeyGenerator.GroupKey groupKey = dicGroupKeyIterator.next();
Object[] keys = groupKey._keys;
Object[] values = Arrays.copyOf(keys, _numColumns);
int groupId = groupKey._groupId;
for (int i = 0; i < _numAggregationFunctions; i++) {
values[_numGroupByExpressions + i] = aggregationGroupByResult.getResultForGroupId(i, groupId);
}
indexedTable.upsert(new Key(keys), new Record(values));
Tracing.ThreadAccountantOps.sampleAndCheckInterruptionPeriodically(mergedKeys);
mergedKeys++;
}
}
} else {
for (IntermediateRecord intermediateResult : intermediateRecords) {
//TODO: change upsert api so that it accepts intermediateRecord directly
indexedTable.upsert(intermediateResult._key, intermediateResult._record);
Tracing.ThreadAccountantOps.sampleAndCheckInterruptionPeriodically(mergedKeys);
mergedKeys++;
}
}
} catch (RuntimeException e) {
throw wrapOperatorException(operator, e);
} finally {
if (operator instanceof AcquireReleaseColumnsSegmentOperator) {
((AcquireReleaseColumnsSegmentOperator) operator).release();
}
}
}

boolean setGroupByResult = false;
while (indexedTable != null && !setGroupByResult) {
IndexedTable indexedTableToMerge = null;
synchronized (this) {
if (_indexedTable == null) {
_indexedTable = indexedTable;
setGroupByResult = true;
} else {
indexedTableToMerge = _indexedTable;
_indexedTable = null;
}
}
if (indexedTableToMerge != null) {
if (indexedTable.size() > indexedTableToMerge.size()) {
indexedTable.merge(indexedTableToMerge);
} else {
indexedTableToMerge.merge(indexedTable);
indexedTable = indexedTableToMerge;
}
}
}
}
}
Loading

0 comments on commit c71c20d

Please sign in to comment.