Skip to content

Commit

Permalink
Add grouped scheduling support to Hive connector
Browse files Browse the repository at this point in the history
  • Loading branch information
haozhun committed Dec 9, 2017
1 parent b6c6b80 commit b42914d
Show file tree
Hide file tree
Showing 7 changed files with 524 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.facebook.presto.spi.Node;
import com.facebook.presto.spi.NodeManager;
import com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider;
import com.facebook.presto.spi.connector.ConnectorPartitionHandle;
import com.facebook.presto.spi.connector.ConnectorPartitioningHandle;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.facebook.presto.spi.type.Type;
Expand All @@ -32,7 +33,9 @@
import java.util.List;
import java.util.Map;
import java.util.function.ToIntFunction;
import java.util.stream.IntStream;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;

public class HiveNodePartitioningProvider
Expand Down Expand Up @@ -83,6 +86,14 @@ public ToIntFunction<ConnectorSplit> getSplitBucketFunction(
return value -> ((HiveSplit) value).getBucketNumber().getAsInt();
}

@Override
public List<ConnectorPartitionHandle> listPartitionHandles(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorPartitioningHandle partitioningHandle)
{
HivePartitioningHandle handle = (HivePartitioningHandle) partitioningHandle;
int bucketCount = handle.getBucketCount();
return IntStream.range(0, bucketCount).mapToObj(HivePartitionHandle::new).collect(toImmutableList());
}

private static <T> List<T> shuffle(Collection<T> items)
{
List<T> list = new ArrayList<>(items);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;

import com.facebook.presto.spi.connector.ConnectorPartitionHandle;

import java.util.Objects;

import static com.google.common.base.MoreObjects.toStringHelper;

public class HivePartitionHandle
extends ConnectorPartitionHandle
{
private final int bucket;

public HivePartitionHandle(int bucket)
{
this.bucket = bucket;
}

public int getBucket()
{
return bucket;
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HivePartitionHandle that = (HivePartitionHandle) o;
return bucket == that.bucket;
}

@Override
public int hashCode()
{
return Objects.hash(bucket);
}

@Override
public String toString()
{
return toStringHelper(this)
.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,13 @@
import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName;
import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static com.facebook.presto.spi.StandardErrorCode.SERVER_SHUTTING_DOWN;
import static com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.collect.Iterables.transform;
import static io.airlift.units.DataSize.Unit.MEGABYTE;
import static java.lang.Math.min;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -161,6 +163,7 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, Co
SchemaTableName tableName = partition.getTableName();
List<HiveBucketing.HiveBucket> buckets = partition.getBuckets();
Optional<HiveBucketHandle> bucketHandle = layout.getBucketHandle();
checkArgument(splitSchedulingStrategy != GROUPED_SCHEDULING || bucketHandle.isPresent(), "SchedulingPolicy is bucketed, but BucketHandle is not present");

// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
Expand All @@ -186,17 +189,37 @@ public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, Co
splitLoaderConcurrency,
recursiveDfsWalkerEnabled);

HiveSplitSource splitSource = new HiveSplitSource(
session,
table.get().getDatabaseName(),
table.get().getTableName(),
layout.getCompactEffectivePredicate(),
maxInitialSplits,
maxOutstandingSplits,
maxOutstandingSplitsSize,
hiveSplitLoader,
executor,
highMemorySplitSourceCounter);
HiveSplitSource splitSource;
switch (splitSchedulingStrategy) {
case UNGROUPED_SCHEDULING:
splitSource = HiveSplitSource.allAtOnce(
session,
table.get().getDatabaseName(),
table.get().getTableName(),
layout.getCompactEffectivePredicate(),
maxInitialSplits,
maxOutstandingSplits,
maxOutstandingSplitsSize,
hiveSplitLoader,
executor,
new CounterStat());
break;
case GROUPED_SCHEDULING:
splitSource = HiveSplitSource.bucketed(
session,
table.get().getDatabaseName(),
table.get().getTableName(),
layout.getCompactEffectivePredicate(),
maxInitialSplits,
maxOutstandingSplits,
new DataSize(32, MEGABYTE),
hiveSplitLoader,
executor,
new CounterStat());
break;
default:
throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
}
hiveSplitLoader.start(splitSource);

return splitSource;
Expand Down
Loading

0 comments on commit b42914d

Please sign in to comment.