From 7415823d302264f2c35ba705fcf2a476e4d80029 Mon Sep 17 00:00:00 2001 From: huangjianmin <531493269@qq.com> Date: Tue, 17 Oct 2023 14:49:22 +0800 Subject: [PATCH] apply spotless-maven-plugin in trino module. --- .../netease/arctic/flink/lookup/KVTable.java | 8 +- .../kafka/LogKafkaPartitionSplitState.java | 16 +- .../log/kafka/LogRecordWithRetractInfo.java | 12 +- .../netease/arctic/flink/util/Projection.java | 4 +- .../flink/DynamicTableSourceTestBase.java | 3 +- .../arctic/flink/read/TestArcticSource.java | 3 +- .../netease/arctic/flink/lookup/KVTable.java | 8 +- .../kafka/LogKafkaPartitionSplitState.java | 16 +- .../log/kafka/LogRecordWithRetractInfo.java | 12 +- .../netease/arctic/flink/util/Projection.java | 4 +- .../flink/DynamicTableSourceTestBase.java | 3 +- .../arctic/flink/read/TestArcticSource.java | 3 +- pom.xml | 2 +- trino/pom.xml | 13 + .../com/netease/arctic/ArcticErrorCode.java | 11 +- .../arctic/trino/ArcticCatalogFactory.java | 8 +- .../ArcticCatalogSupportTableSuffix.java | 24 +- .../netease/arctic/trino/ArcticConfig.java | 5 +- .../netease/arctic/trino/ArcticConnector.java | 48 +- .../arctic/trino/ArcticConnectorFactory.java | 74 +- .../arctic/trino/ArcticConnectorMetadata.java | 106 +- .../trino/ArcticConnectorSplitManager.java | 21 +- .../trino/ArcticHadoopAuthentication.java | 4 +- .../trino/ArcticHdfsAuthentication.java | 13 +- .../arctic/trino/ArcticHdfsConfiguration.java | 8 +- .../arctic/trino/ArcticMetadataFactory.java | 24 +- .../netease/arctic/trino/ArcticModule.java | 59 +- .../trino/ArcticPageSourceProvider.java | 21 +- .../netease/arctic/trino/ArcticPlugin.java | 5 +- .../arctic/trino/ArcticSessionProperties.java | 66 +- .../trino/ArcticTransactionManager.java | 33 +- .../trino/DefaultArcticCatalogFactory.java | 12 +- .../arctic/trino/TableNameResolve.java | 6 +- .../trino/delete/DummyFileScanTask.java | 14 +- .../arctic/trino/delete/TrinoDeleteFile.java | 95 +- .../netease/arctic/trino/delete/TrinoRow.java | 47 +- .../trino/keyed/KeyedConnectorMetadata.java | 500 +- .../trino/keyed/KeyedConnectorPageSource.java | 54 +- .../trino/keyed/KeyedConnectorSplit.java | 5 +- .../keyed/KeyedConnectorSplitManager.java | 66 +- .../arctic/trino/keyed/KeyedDeleteFilter.java | 34 +- .../trino/keyed/KeyedPageSourceProvider.java | 51 +- .../arctic/trino/keyed/KeyedTableHandle.java | 7 +- .../unkeyed/AdaptHiveIcebergTableHandle.java | 26 +- .../trino/unkeyed/ArcticTrinoCatalog.java | 86 +- .../unkeyed/ArcticTrinoCatalogFactory.java | 4 +- .../arctic/trino/unkeyed/IcebergMetadata.java | 1693 ++++--- .../trino/unkeyed/IcebergPageSource.java | 49 +- .../unkeyed/IcebergPageSourceProvider.java | 830 ++-- .../arctic/trino/unkeyed/IcebergSplit.java | 36 +- .../trino/unkeyed/IcebergSplitManager.java | 49 +- .../trino/unkeyed/IcebergSplitSource.java | 232 +- .../trino/unkeyed/TrinoDeleteFilter.java | 44 +- .../netease/arctic/trino/util/MetricUtil.java | 4 +- .../trino/util/ObjectSerializerUtil.java | 12 +- .../parquet/reader/ColumnReaderFactory.java | 219 +- .../parquet/reader/TimestampColumnReader.java | 27 +- .../hive/metastore/HiveMetaStoreClient.java | 917 ++-- .../org/apache/hadoop/util/VersionInfo.java | 38 +- .../ParquetMetadataConverterUtil.java | 11 +- .../org/apache/parquet/io/ColumnIOUtil.java | 8 +- .../trino/arctic/ArcticQueryRunner.java | 40 +- .../arctic/trino/arctic/HiveTestRecords.java | 373 +- .../trino/arctic/TableTestBaseForTrino.java | 204 +- .../TableTestBaseWithInitDataForTrino.java | 97 +- .../arctic/TestBaseArcticPrimaryTable.java | 61 +- .../arctic/trino/arctic/TestHiveTable.java | 390 +- .../arctic/TestHiveTableBaseForTrino.java | 148 +- .../trino/iceberg/ArcticQueryRunner.java | 61 +- .../iceberg/ArcticQueryRunnerForClient.java | 35 +- .../trino/iceberg/BaseConnectorTest.java | 2327 ++++++---- .../trino/iceberg/SchemaInitializer.java | 31 +- .../iceberg/TestArcticCatalogFactory.java | 5 +- .../iceberg/TestArcticConnectorFactory.java | 84 +- .../arctic/trino/iceberg/TestArcticTable.java | 2 +- .../iceberg/TestBaseArcticConnectorTest.java | 4059 ++++++++++------- .../trino/iceberg/TestBasicArcticCatalog.java | 40 +- .../TestOnServerArcticConnectorTest.java | 3 +- .../arctic/trino/iceberg/TestUnionModule.java | 148 +- 79 files changed, 8081 insertions(+), 5840 deletions(-) diff --git a/flink/flink-common/src/main/java/com/netease/arctic/flink/lookup/KVTable.java b/flink/flink-common/src/main/java/com/netease/arctic/flink/lookup/KVTable.java index 2991ee47ed..3e9b7b7b49 100644 --- a/flink/flink-common/src/main/java/com/netease/arctic/flink/lookup/KVTable.java +++ b/flink/flink-common/src/main/java/com/netease/arctic/flink/lookup/KVTable.java @@ -57,10 +57,14 @@ public interface KVTable extends Serializable, Closeable { */ void initialize(Iterator dataStream) throws IOException; - /** @return if the rowData is filtered, return true. */ + /** + * @return if the rowData is filtered, return true. + */ boolean filter(T value); - /** @return if initialization is completed, return true. */ + /** + * @return if initialization is completed, return true. + */ boolean initialized(); /** diff --git a/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java b/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java index 1131882112..cb8344eaa1 100644 --- a/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java +++ b/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java @@ -34,13 +34,21 @@ public class LogKafkaPartitionSplitState extends KafkaPartitionSplitState { * opposite RowKind. */ private boolean retracting; - /** @see LogKafkaPartitionSplit#retractStopOffset */ + /** + * @see LogKafkaPartitionSplit#retractStopOffset + */ @Nullable private Long retractStopOffset; - /** @see LogKafkaPartitionSplit#revertStartOffset */ + /** + * @see LogKafkaPartitionSplit#revertStartOffset + */ @Nullable private Long revertStartOffset; - /** @see LogKafkaPartitionSplit#retractingEpicNo */ + /** + * @see LogKafkaPartitionSplit#retractingEpicNo + */ @Nullable private Long retractingEpicNo; - /** @see LogKafkaPartitionSplit#retractingUpstreamId */ + /** + * @see LogKafkaPartitionSplit#retractingUpstreamId + */ @Nullable private String retractingUpstreamId; /** Key: upstream job id + "_" + epicNo, Value: epic start offset */ private final NavigableMap upstreamEpicStartOffsets; diff --git a/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java b/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java index af942c3904..e1b8d4f90f 100644 --- a/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java +++ b/flink/flink-common/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java @@ -28,11 +28,17 @@ public class LogRecordWithRetractInfo extends ConsumerRecord * opposite RowKind. */ private final boolean retracting; - /** @see LogKafkaPartitionSplit#retractStopOffset */ + /** + * @see LogKafkaPartitionSplit#retractStopOffset + */ private final Long retractStoppingOffset; - /** @see LogKafkaPartitionSplit#revertStartOffset */ + /** + * @see LogKafkaPartitionSplit#revertStartOffset + */ private final Long revertStartingOffset; - /** @see LogKafkaPartitionSplit#retractingEpicNo */ + /** + * @see LogKafkaPartitionSplit#retractingEpicNo + */ private final Long retractingEpicNo; private final LogData logData; diff --git a/flink/flink-common/src/main/java/com/netease/arctic/flink/util/Projection.java b/flink/flink-common/src/main/java/com/netease/arctic/flink/util/Projection.java index 14175177a4..96cdf696b4 100644 --- a/flink/flink-common/src/main/java/com/netease/arctic/flink/util/Projection.java +++ b/flink/flink-common/src/main/java/com/netease/arctic/flink/util/Projection.java @@ -67,7 +67,9 @@ public LogicalType project(LogicalType logicalType) { return this.project(TypeConversions.fromLogicalToDataType(logicalType)).getLogicalType(); } - /** @return {@code true} whether this projection is nested or not. */ + /** + * @return {@code true} whether this projection is nested or not. + */ public abstract boolean isNested(); /** diff --git a/flink/flink-common/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java b/flink/flink-common/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java index 0b3fdc327c..606f4d0a89 100644 --- a/flink/flink-common/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java +++ b/flink/flink-common/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java @@ -61,7 +61,8 @@ public void cancel() {} false); } - public void init() {}; + public void init() {} + ; public abstract void doRun( WatermarkGenerator generator, diff --git a/flink/flink-common/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java b/flink/flink-common/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java index ea0c5749e1..8f429f6d7e 100644 --- a/flink/flink-common/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java +++ b/flink/flink-common/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java @@ -123,7 +123,8 @@ public class TestArcticSource extends TestRowDataReaderFunction implements Seria protected static final String sinkTableName = "test_sink_exactly_once"; protected static final TableIdentifier FAIL_TABLE_ID = TableIdentifier.of( - TableTestHelper.TEST_CATALOG_NAME, TableTestHelper.TEST_DB_NAME, sinkTableName);; + TableTestHelper.TEST_CATALOG_NAME, TableTestHelper.TEST_DB_NAME, sinkTableName); + ; @Before public void testSetup() throws IOException { diff --git a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/lookup/KVTable.java b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/lookup/KVTable.java index 2991ee47ed..3e9b7b7b49 100644 --- a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/lookup/KVTable.java +++ b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/lookup/KVTable.java @@ -57,10 +57,14 @@ public interface KVTable extends Serializable, Closeable { */ void initialize(Iterator dataStream) throws IOException; - /** @return if the rowData is filtered, return true. */ + /** + * @return if the rowData is filtered, return true. + */ boolean filter(T value); - /** @return if initialization is completed, return true. */ + /** + * @return if initialization is completed, return true. + */ boolean initialized(); /** diff --git a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java index 1131882112..cb8344eaa1 100644 --- a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java +++ b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogKafkaPartitionSplitState.java @@ -34,13 +34,21 @@ public class LogKafkaPartitionSplitState extends KafkaPartitionSplitState { * opposite RowKind. */ private boolean retracting; - /** @see LogKafkaPartitionSplit#retractStopOffset */ + /** + * @see LogKafkaPartitionSplit#retractStopOffset + */ @Nullable private Long retractStopOffset; - /** @see LogKafkaPartitionSplit#revertStartOffset */ + /** + * @see LogKafkaPartitionSplit#revertStartOffset + */ @Nullable private Long revertStartOffset; - /** @see LogKafkaPartitionSplit#retractingEpicNo */ + /** + * @see LogKafkaPartitionSplit#retractingEpicNo + */ @Nullable private Long retractingEpicNo; - /** @see LogKafkaPartitionSplit#retractingUpstreamId */ + /** + * @see LogKafkaPartitionSplit#retractingUpstreamId + */ @Nullable private String retractingUpstreamId; /** Key: upstream job id + "_" + epicNo, Value: epic start offset */ private final NavigableMap upstreamEpicStartOffsets; diff --git a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java index 623394f877..0c57a6a869 100644 --- a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java +++ b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/read/source/log/kafka/LogRecordWithRetractInfo.java @@ -28,11 +28,17 @@ public class LogRecordWithRetractInfo extends ConsumerRecord * opposite RowKind. */ private final boolean retracting; - /** @see LogKafkaPartitionSplit#retractStopOffset */ + /** + * @see LogKafkaPartitionSplit#retractStopOffset + */ private final Long retractStoppingOffset; - /** @see LogKafkaPartitionSplit#revertStartOffset */ + /** + * @see LogKafkaPartitionSplit#revertStartOffset + */ private final Long revertStartingOffset; - /** @see LogKafkaPartitionSplit#retractingEpicNo */ + /** + * @see LogKafkaPartitionSplit#retractingEpicNo + */ private final Long retractingEpicNo; private final LogData logData; diff --git a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/util/Projection.java b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/util/Projection.java index 14175177a4..96cdf696b4 100644 --- a/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/util/Projection.java +++ b/flink/v1.15/flink/src/main/java/com/netease/arctic/flink/util/Projection.java @@ -67,7 +67,9 @@ public LogicalType project(LogicalType logicalType) { return this.project(TypeConversions.fromLogicalToDataType(logicalType)).getLogicalType(); } - /** @return {@code true} whether this projection is nested or not. */ + /** + * @return {@code true} whether this projection is nested or not. + */ public abstract boolean isNested(); /** diff --git a/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java b/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java index 0b3fdc327c..606f4d0a89 100644 --- a/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java +++ b/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/DynamicTableSourceTestBase.java @@ -61,7 +61,8 @@ public void cancel() {} false); } - public void init() {}; + public void init() {} + ; public abstract void doRun( WatermarkGenerator generator, diff --git a/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java b/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java index ea0c5749e1..8f429f6d7e 100644 --- a/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java +++ b/flink/v1.15/flink/src/test/java/com/netease/arctic/flink/read/TestArcticSource.java @@ -123,7 +123,8 @@ public class TestArcticSource extends TestRowDataReaderFunction implements Seria protected static final String sinkTableName = "test_sink_exactly_once"; protected static final TableIdentifier FAIL_TABLE_ID = TableIdentifier.of( - TableTestHelper.TEST_CATALOG_NAME, TableTestHelper.TEST_DB_NAME, sinkTableName);; + TableTestHelper.TEST_CATALOG_NAME, TableTestHelper.TEST_DB_NAME, sinkTableName); + ; @Before public void testSetup() throws IOException { diff --git a/pom.xml b/pom.xml index 56325a5620..bfca9225da 100644 --- a/pom.xml +++ b/pom.xml @@ -907,7 +907,7 @@ - 1.7 + 1.15.0 diff --git a/trino/pom.xml b/trino/pom.xml index f55df7c065..663addd07b 100644 --- a/trino/pom.xml +++ b/trino/pom.xml @@ -592,6 +592,19 @@ true + + + org.apache.maven.plugins + maven-checkstyle-plugin + + true + + + + + com.diffplug.spotless + spotless-maven-plugin + src/main/java diff --git a/trino/src/main/java/com/netease/arctic/ArcticErrorCode.java b/trino/src/main/java/com/netease/arctic/ArcticErrorCode.java index f962b31acb..c8fdfeef58 100644 --- a/trino/src/main/java/com/netease/arctic/ArcticErrorCode.java +++ b/trino/src/main/java/com/netease/arctic/ArcticErrorCode.java @@ -18,17 +18,14 @@ package com.netease.arctic; +import static io.trino.spi.ErrorType.EXTERNAL; + import io.trino.spi.ErrorCode; import io.trino.spi.ErrorCodeSupplier; import io.trino.spi.ErrorType; -import static io.trino.spi.ErrorType.EXTERNAL; - -/** - * Error code - */ -public enum ArcticErrorCode - implements ErrorCodeSupplier { +/** Error code */ +public enum ArcticErrorCode implements ErrorCodeSupplier { ARCTIC_BAD_DATA(4, EXTERNAL); private final ErrorCode errorCode; diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogFactory.java b/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogFactory.java index 23c452c6dd..42611c6f89 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogFactory.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogFactory.java @@ -21,14 +21,10 @@ import com.netease.arctic.catalog.ArcticCatalog; import com.netease.arctic.table.TableMetaStore; -/** - * A interface of factory to generate ArcticCatalog - */ +/** A interface of factory to generate ArcticCatalog */ public interface ArcticCatalogFactory { - /** - * generate ArcticCatalog - */ + /** generate ArcticCatalog */ ArcticCatalog getArcticCatalog(); TableMetaStore getTableMetastore(); diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogSupportTableSuffix.java b/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogSupportTableSuffix.java index 9d3603ba9a..7759bf25bd 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogSupportTableSuffix.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticCatalogSupportTableSuffix.java @@ -61,12 +61,14 @@ import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.StructLikeMap; + import java.util.List; import java.util.Map; import java.util.stream.Collectors; /** - * A wrapper of {@link ArcticCatalog} to resolve sub table, such as "tableName#change","tableName#base" + * A wrapper of {@link ArcticCatalog} to resolve sub table, such as + * "tableName#change","tableName#base" */ public class ArcticCatalogSupportTableSuffix implements ArcticCatalog { @@ -82,8 +84,7 @@ public String name() { } @Override - public void initialize( - AmsClient client, CatalogMeta meta, Map properties) { + public void initialize(AmsClient client, CatalogMeta meta, Map properties) { arcticCatalog.initialize(client, meta, properties); } @@ -111,12 +112,18 @@ public List listTables(String database) { public ArcticTable loadTable(TableIdentifier tableIdentifier) { TableNameResolve tableNameResolve = new TableNameResolve(tableIdentifier.getTableName()); if (tableNameResolve.withSuffix()) { - TableIdentifier newTableIdentifier = TableIdentifier.of(tableIdentifier.getCatalog(), - tableIdentifier.getDatabase(), tableNameResolve.getTableName()); + TableIdentifier newTableIdentifier = + TableIdentifier.of( + tableIdentifier.getCatalog(), + tableIdentifier.getDatabase(), + tableNameResolve.getTableName()); ArcticTable arcticTable = arcticCatalog.loadTable(newTableIdentifier); if (arcticTable.isUnkeyedTable()) { - throw new IllegalArgumentException("table " + newTableIdentifier + " is not keyed table can not use " + - "change or base suffix"); + throw new IllegalArgumentException( + "table " + + newTableIdentifier + + " is not keyed table can not use " + + "change or base suffix"); } KeyedTable keyedTable = arcticTable.asKeyedTable(); if (tableNameResolve.isBase()) { @@ -139,8 +146,7 @@ public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { } @Override - public TableBuilder newTableBuilder( - TableIdentifier identifier, Schema schema) { + public TableBuilder newTableBuilder(TableIdentifier identifier, Schema schema) { return arcticCatalog.newTableBuilder(identifier, schema); } diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticConfig.java b/trino/src/main/java/com/netease/arctic/trino/ArcticConfig.java index 61ddfcb9ab..3483710612 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticConfig.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticConfig.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -22,9 +21,7 @@ import io.airlift.configuration.Config; import io.airlift.configuration.ConfigDescription; -/** - * Arctic config - */ +/** Arctic config */ public class ArcticConfig { private String catalogUrl; private boolean hdfsImpersonationEnabled; diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticConnector.java b/trino/src/main/java/com/netease/arctic/trino/ArcticConnector.java index 9ddac09efd..e7faac2dbe 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticConnector.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticConnector.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,13 @@ package com.netease.arctic.trino; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.Sets.immutableEnumSet; +import static io.trino.spi.connector.ConnectorCapabilities.NOT_NULL_COLUMN_CONSTRAINT; +import static io.trino.spi.transaction.IsolationLevel.SERIALIZABLE; +import static io.trino.spi.transaction.IsolationLevel.checkConnectorSupports; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import io.airlift.bootstrap.LifeCycleManager; @@ -43,20 +49,12 @@ import org.slf4j.LoggerFactory; import javax.inject.Inject; + import java.util.List; import java.util.Optional; import java.util.Set; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.Sets.immutableEnumSet; -import static io.trino.spi.connector.ConnectorCapabilities.NOT_NULL_COLUMN_CONSTRAINT; -import static io.trino.spi.transaction.IsolationLevel.SERIALIZABLE; -import static io.trino.spi.transaction.IsolationLevel.checkConnectorSupports; -import static java.util.Objects.requireNonNull; - -/** - * A Connector of arctic to Trino - */ +/** A Connector of arctic to Trino */ public class ArcticConnector implements Connector { private static final Logger log = LoggerFactory.getLogger(ArcticConnector.class); @@ -93,15 +91,22 @@ public ArcticConnector( this.splitManager = requireNonNull(splitManager, "splitManager is null"); this.pageSourceProvider = requireNonNull(pageSourceProvider, "pageSourceProvider is null"); this.pageSinkProvider = requireNonNull(pageSinkProvider, "pageSinkProvider is null"); - this.nodePartitioningProvider = requireNonNull(nodePartitioningProvider, "nodePartitioningProvider is null"); - this.sessionProperties = requireNonNull(sessionPropertiesProviders, "sessionPropertiesProviders is null").stream() - .flatMap(sessionPropertiesProvider -> sessionPropertiesProvider.getSessionProperties().stream()) - .collect(toImmutableList()); - this.schemaProperties = ImmutableList.copyOf(requireNonNull(schemaProperties, "schemaProperties is null")); - this.tableProperties = ImmutableList.copyOf(requireNonNull(tableProperties, "tableProperties is null")); + this.nodePartitioningProvider = + requireNonNull(nodePartitioningProvider, "nodePartitioningProvider is null"); + this.sessionProperties = + requireNonNull(sessionPropertiesProviders, "sessionPropertiesProviders is null").stream() + .flatMap( + sessionPropertiesProvider -> + sessionPropertiesProvider.getSessionProperties().stream()) + .collect(toImmutableList()); + this.schemaProperties = + ImmutableList.copyOf(requireNonNull(schemaProperties, "schemaProperties is null")); + this.tableProperties = + ImmutableList.copyOf(requireNonNull(tableProperties, "tableProperties is null")); this.accessControl = requireNonNull(accessControl, "accessControl is null"); this.procedures = ImmutableSet.copyOf(requireNonNull(procedures, "procedures is null")); - this.tableProcedures = ImmutableSet.copyOf(requireNonNull(tableProcedures, "tableProcedures is null")); + this.tableProcedures = + ImmutableSet.copyOf(requireNonNull(tableProcedures, "tableProcedures is null")); } @Override @@ -110,7 +115,8 @@ public Set getCapabilities() { } @Override - public ConnectorMetadata getMetadata(ConnectorSession session, ConnectorTransactionHandle transaction) { + public ConnectorMetadata getMetadata( + ConnectorSession session, ConnectorTransactionHandle transaction) { ConnectorMetadata metadata = transactionManager.get(transaction); return new ClassLoaderSafeConnectorMetadata(metadata, getClass().getClassLoader()); } @@ -172,9 +178,7 @@ public ConnectorAccessControl getAccessControl() { @Override public ConnectorTransactionHandle beginTransaction( - IsolationLevel isolationLevel, - boolean readOnly, - boolean autoCommit) { + IsolationLevel isolationLevel, boolean readOnly, boolean autoCommit) { checkConnectorSupports(SERIALIZABLE, isolationLevel); ConnectorTransactionHandle transaction = new HiveTransactionHandle(autoCommit); transactionManager.begin(transaction); diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorFactory.java b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorFactory.java index 5754528412..f16d8b8ba0 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorFactory.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorFactory.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,8 @@ package com.netease.arctic.trino; +import static com.google.inject.Scopes.SINGLETON; + import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.TypeLiteral; @@ -63,11 +64,7 @@ import java.util.Optional; import java.util.Set; -import static com.google.inject.Scopes.SINGLETON; - -/** - * Factory to generate {@link Connector} - */ +/** Factory to generate {@link Connector} */ public class ArcticConnectorFactory implements ConnectorFactory { private static final Logger LOG = LoggerFactory.getLogger(ArcticConnectorFactory.class); @@ -78,43 +75,54 @@ public String getName() { } @Override - public Connector create(String catalogName, Map config, ConnectorContext context) { + public Connector create( + String catalogName, Map config, ConnectorContext context) { ClassLoader classLoader = InternalIcebergConnectorFactory.class.getClassLoader(); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - Bootstrap app = new Bootstrap( - new EventModule(), - new MBeanModule(), - new ConnectorObjectNameGeneratorModule("io.trino.plugin.iceberg", "trino.plugin.iceberg"), - new JsonModule(), - new ArcticModule(context.getTypeManager()), - new IcebergSecurityModule(), - new MBeanServerModule(), - binder -> { - binder.bind(NodeVersion.class) - .toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); - binder.bind(NodeManager.class).toInstance(context.getNodeManager()); - binder.bind(TypeManager.class).toInstance(context.getTypeManager()); - binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); - binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); - binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(SINGLETON); - }); + Bootstrap app = + new Bootstrap( + new EventModule(), + new MBeanModule(), + new ConnectorObjectNameGeneratorModule( + "io.trino.plugin.iceberg", "trino.plugin.iceberg"), + new JsonModule(), + new ArcticModule(context.getTypeManager()), + new IcebergSecurityModule(), + new MBeanServerModule(), + binder -> { + binder + .bind(NodeVersion.class) + .toInstance( + new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); + binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); + binder + .bind(TrinoFileSystemFactory.class) + .to(HdfsFileSystemFactory.class) + .in(SINGLETON); + }); - Injector injector = app - .doNotInitializeLogging() - .setRequiredConfigurationProperties(config) - .initialize(); + Injector injector = + app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize(); LifeCycleManager lifeCycleManager = injector.getInstance(LifeCycleManager.class); - ArcticTransactionManager transactionManager = injector.getInstance(ArcticTransactionManager.class); + ArcticTransactionManager transactionManager = + injector.getInstance(ArcticTransactionManager.class); ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class); - ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class); - ConnectorPageSinkProvider pageSinkProvider = injector.getInstance(ConnectorPageSinkProvider.class); + ConnectorPageSourceProvider connectorPageSource = + injector.getInstance(ConnectorPageSourceProvider.class); + ConnectorPageSinkProvider pageSinkProvider = + injector.getInstance(ConnectorPageSinkProvider.class); ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class); Set sessionPropertiesProviders = injector.getInstance(Key.get(new TypeLiteral>() {})); - IcebergTableProperties icebergTableProperties = injector.getInstance(IcebergTableProperties.class); - Set procedures = injector.getInstance(Key.get(new TypeLiteral>() {})); + IcebergTableProperties icebergTableProperties = + injector.getInstance(IcebergTableProperties.class); + Set procedures = + injector.getInstance(Key.get(new TypeLiteral>() {})); Set tableProcedures = injector.getInstance(Key.get(new TypeLiteral>() {})); Optional accessControl = diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorMetadata.java b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorMetadata.java index 88bbe208f1..33d25724c2 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorMetadata.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorMetadata.java @@ -18,6 +18,9 @@ package com.netease.arctic.trino; +import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; +import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; + import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.netease.arctic.catalog.ArcticCatalog; @@ -65,13 +68,10 @@ import java.util.OptionalLong; import java.util.stream.Collectors; -import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; - /** - * {@link ArcticConnectorMetadata} is a Union {@link ConnectorMetadata} contain {@link KeyedConnectorMetadata} and - * {@link IcebergMetadata}. - * This is final {@link ConnectorMetadata} provided to Trino + * {@link ArcticConnectorMetadata} is a Union {@link ConnectorMetadata} contain {@link + * KeyedConnectorMetadata} and {@link IcebergMetadata}. This is final {@link ConnectorMetadata} + * provided to Trino */ public class ArcticConnectorMetadata implements ConnectorMetadata { @@ -92,12 +92,14 @@ public ArcticConnectorMetadata( @Override public List listSchemaNames(ConnectorSession session) { - return arcticCatalog.listDatabases().stream().map(s -> s.toLowerCase(Locale.ROOT)).collect(Collectors.toList()); + return arcticCatalog.listDatabases().stream() + .map(s -> s.toLowerCase(Locale.ROOT)) + .collect(Collectors.toList()); } @Override public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) { - //需要缓存 + // 需要缓存 ArcticTable arcticTable = null; try { arcticTable = getArcticTable(tableName); @@ -116,7 +118,8 @@ public Optional getSystemTable(ConnectorSession session, SchemaTabl return icebergMetadata.getSystemTable(session, tableName); } - public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) { + public ConnectorTableProperties getTableProperties( + ConnectorSession session, ConnectorTableHandle table) { if (table instanceof KeyedTableHandle) { return new ConnectorTableProperties(); } else { @@ -125,7 +128,8 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con } @Override - public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { + public ConnectorTableMetadata getTableMetadata( + ConnectorSession session, ConnectorTableHandle table) { if (table instanceof KeyedTableHandle) { return keyedConnectorMetadata.getTableMetadata(session, table); } else { @@ -135,15 +139,15 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect @Override public List listTables(ConnectorSession session, Optional schemaName) { - return listNamespaces(session, schemaName) - .stream() + return listNamespaces(session, schemaName).stream() .flatMap(s -> arcticCatalog.listTables(s).stream()) .map(s -> new SchemaTableName(s.getDatabase(), s.getTableName())) .collect(Collectors.toList()); } @Override - public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Map getColumnHandles( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (tableHandle instanceof KeyedTableHandle) { return keyedConnectorMetadata.getColumnHandles(session, tableHandle); } else { @@ -153,9 +157,7 @@ public Map getColumnHandles(ConnectorSession session, Conn @Override public ColumnMetadata getColumnMetadata( - ConnectorSession session, - ConnectorTableHandle tableHandle, - ColumnHandle columnHandle) { + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { if (tableHandle instanceof KeyedTableHandle) { return keyedConnectorMetadata.getColumnMetadata(session, tableHandle, columnHandle); } else { @@ -164,11 +166,13 @@ public ColumnMetadata getColumnMetadata( } @Override - public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { + public Iterator streamTableColumns( + ConnectorSession session, SchemaTablePrefix prefix) { if (prefix.getTable().isPresent()) { ArcticTable arcticTable = null; try { - arcticTable = getArcticTable(new SchemaTableName(prefix.getSchema().get(), prefix.getTable().get())); + arcticTable = + getArcticTable(new SchemaTableName(prefix.getSchema().get(), prefix.getTable().get())); } catch (NoSuchTableException e) { List schemaTableNames = ImmutableList.of(); return schemaTableNames.iterator(); @@ -186,14 +190,14 @@ public Iterator streamTableColumns(ConnectorSession sessio } @Override - public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { + public void createTable( + ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { icebergMetadata.createTable(session, tableMetadata, ignoreExisting); } @Override public Optional getNewTableLayout( - ConnectorSession session, - ConnectorTableMetadata tableMetadata) { + ConnectorSession session, ConnectorTableMetadata tableMetadata) { return icebergMetadata.getNewTableLayout(session, tableMetadata); } @@ -216,7 +220,8 @@ public Optional finishCreateTable( } @Override - public Optional getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Optional getInsertLayout( + ConnectorSession session, ConnectorTableHandle tableHandle) { return icebergMetadata.getInsertLayout(session, tableHandle); } @@ -247,29 +252,25 @@ public Optional getTableHandleForExecute( RetryMode retryMode) { if (connectorTableHandle instanceof KeyedTableHandle) { return ConnectorMetadata.super.getTableHandleForExecute( - session, - connectorTableHandle, - procedureName, - executeProperties, - retryMode); + session, connectorTableHandle, procedureName, executeProperties, retryMode); } else { - return icebergMetadata.getTableHandleForExecute(session, connectorTableHandle, - procedureName, executeProperties, retryMode); + return icebergMetadata.getTableHandleForExecute( + session, connectorTableHandle, procedureName, executeProperties, retryMode); } } @Override public Optional getLayoutForTableExecute( - ConnectorSession session, - ConnectorTableExecuteHandle tableExecuteHandle) { + ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { return icebergMetadata.getLayoutForTableExecute(session, tableExecuteHandle); } @Override - public BeginTableExecuteResult beginTableExecute( - ConnectorSession session, - ConnectorTableExecuteHandle tableExecuteHandle, - ConnectorTableHandle updatedSourceTableHandle) { + public BeginTableExecuteResult + beginTableExecute( + ConnectorSession session, + ConnectorTableExecuteHandle tableExecuteHandle, + ConnectorTableHandle updatedSourceTableHandle) { return icebergMetadata.beginTableExecute(session, tableExecuteHandle, updatedSourceTableHandle); } @@ -283,7 +284,8 @@ public void finishTableExecute( } @Override - public void executeTableExecute(ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { + public void executeTableExecute( + ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { icebergMetadata.executeTableExecute(session, tableExecuteHandle); } @@ -318,7 +320,8 @@ public void setTableProperties( } @Override - public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata column) { + public void addColumn( + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata column) { if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "key table UnSupport add column"); } else { @@ -327,7 +330,8 @@ public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle } @Override - public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column) { + public void dropColumn( + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column) { if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "key table UnSupport drop column"); } else { @@ -336,7 +340,8 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl } @Override - public Optional applyDelete(ConnectorSession session, ConnectorTableHandle handle) { + public Optional applyDelete( + ConnectorSession session, ConnectorTableHandle handle) { if (handle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "key table UnSupport apply delete"); } else { @@ -355,9 +360,7 @@ public OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandle @Override public Optional> applyFilter( - ConnectorSession session, - ConnectorTableHandle handle, - Constraint constraint) { + ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) { if (handle instanceof KeyedTableHandle) { return keyedConnectorMetadata.applyFilter(session, handle, constraint); } else { @@ -379,7 +382,8 @@ public Optional> applyProjecti } @Override - public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle) { + public TableStatistics getTableStatistics( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (tableHandle instanceof KeyedTableHandle) { return keyedConnectorMetadata.getTableStatistics(session, tableHandle); } else { @@ -395,12 +399,14 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata( if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "This connector does not support analyze"); } else { - return icebergMetadata.getStatisticsCollectionMetadata(session, tableHandle, analyzeProperties); + return icebergMetadata.getStatisticsCollectionMetadata( + session, tableHandle, analyzeProperties); } } @Override - public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) { + public ConnectorTableHandle beginStatisticsCollection( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "This connector does not support analyze"); } else { @@ -409,7 +415,8 @@ public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, } @Override - public ColumnHandle getMergeRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle) { + public ColumnHandle getMergeRowIdColumnHandle( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "Key table does not support modifying table rows"); } else { @@ -419,8 +426,7 @@ public ColumnHandle getMergeRowIdColumnHandle(ConnectorSession session, Connecto @Override public ConnectorMergeTableHandle beginMerge( - ConnectorSession session, - ConnectorTableHandle tableHandle, RetryMode retryMode) { + ConnectorSession session, ConnectorTableHandle tableHandle, RetryMode retryMode) { if (tableHandle instanceof KeyedTableHandle) { throw new TrinoException(NOT_SUPPORTED, "Key table does not support beginMerge"); } else { @@ -450,8 +456,8 @@ public ArcticTable getArcticTable(SchemaTableName schemaTableName) { } private TableIdentifier getTableIdentifier(SchemaTableName schemaTableName) { - return TableIdentifier.of(arcticCatalog.name(), - schemaTableName.getSchemaName(), schemaTableName.getTableName()); + return TableIdentifier.of( + arcticCatalog.name(), schemaTableName.getSchemaName(), schemaTableName.getTableName()); } private List listNamespaces(ConnectorSession session, Optional namespace) { diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorSplitManager.java b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorSplitManager.java index dc7e6c420c..b16d77963b 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorSplitManager.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticConnectorSplitManager.java @@ -32,10 +32,9 @@ import javax.inject.Inject; /** - * {@link ArcticConnectorSplitManager} is a Union {@link ConnectorSplitManager} contain - * {@link KeyedConnectorSplitManager} and - * {@link IcebergSplitManager}. - * This is final {@link ConnectorSplitManager} provided to Trino + * {@link ArcticConnectorSplitManager} is a Union {@link ConnectorSplitManager} contain {@link + * KeyedConnectorSplitManager} and {@link IcebergSplitManager}. This is final {@link + * ConnectorSplitManager} provided to Trino */ public class ArcticConnectorSplitManager implements ConnectorSplitManager { @@ -53,14 +52,16 @@ public ArcticConnectorSplitManager( @Override public ConnectorSplitSource getSplits( - ConnectorTransactionHandle transaction, ConnectorSession session, - ConnectorTableHandle table, DynamicFilter dynamicFilter, Constraint constraint) { + ConnectorTransactionHandle transaction, + ConnectorSession session, + ConnectorTableHandle table, + DynamicFilter dynamicFilter, + Constraint constraint) { if (table instanceof KeyedTableHandle) { - return keyedConnectorSplitManager.getSplits(transaction, session, - table, dynamicFilter, constraint); + return keyedConnectorSplitManager.getSplits( + transaction, session, table, dynamicFilter, constraint); } else { - return icebergSplitManager.getSplits(transaction, session, - table, dynamicFilter, constraint); + return icebergSplitManager.getSplits(transaction, session, table, dynamicFilter, constraint); } } } diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticHadoopAuthentication.java b/trino/src/main/java/com/netease/arctic/trino/ArcticHadoopAuthentication.java index 9c8c1cdcc5..188e698393 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticHadoopAuthentication.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticHadoopAuthentication.java @@ -23,9 +23,7 @@ import io.trino.hdfs.authentication.HadoopAuthentication; import org.apache.hadoop.security.UserGroupInformation; -/** - * Arctic Hadoop Authentication using TableMetaStore - */ +/** Arctic Hadoop Authentication using TableMetaStore */ public class ArcticHadoopAuthentication implements HadoopAuthentication { private final ArcticCatalogFactory arcticCatalogFactory; diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsAuthentication.java b/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsAuthentication.java index 09ff6e7c76..e8925931bb 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsAuthentication.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsAuthentication.java @@ -26,9 +26,7 @@ import javax.inject.Inject; -/** - * Arctic HDFS Authentication using TableMetaStore - */ +/** Arctic HDFS Authentication using TableMetaStore */ public class ArcticHdfsAuthentication implements HdfsAuthentication { private final TableMetaStore tableMetaStore; @@ -36,15 +34,18 @@ public class ArcticHdfsAuthentication implements HdfsAuthentication { private final ArcticConfig arcticConfig; @Inject - public ArcticHdfsAuthentication(ArcticCatalogFactory arcticCatalogFactory, ArcticConfig arcticConfig) { + public ArcticHdfsAuthentication( + ArcticCatalogFactory arcticCatalogFactory, ArcticConfig arcticConfig) { this.tableMetaStore = arcticCatalogFactory.getTableMetastore(); this.arcticConfig = arcticConfig; } @Override - public R doAs(ConnectorIdentity identity, GenericExceptionAction action) throws E { + public R doAs( + ConnectorIdentity identity, GenericExceptionAction action) throws E { boolean hdfsImpersonationEnabled = arcticConfig.getHdfsImpersonationEnabled(); - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(this.getClass().getClassLoader())) { + try (ThreadContextClassLoader ignored = + new ThreadContextClassLoader(this.getClass().getClassLoader())) { if (hdfsImpersonationEnabled && identity.getUser() != null) { return tableMetaStore.doAsImpersonating(identity.getUser(), () -> action.run()); } else { diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsConfiguration.java b/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsConfiguration.java index bbb5597835..b9e95df13c 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsConfiguration.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticHdfsConfiguration.java @@ -24,11 +24,10 @@ import org.apache.hadoop.conf.Configuration; import javax.inject.Inject; + import java.net.URI; -/** - * Factory to generate Configuration of Hadoop - */ +/** Factory to generate Configuration of Hadoop */ public class ArcticHdfsConfiguration implements HdfsConfiguration { private final ArcticCatalogFactory arcticCatalogFactory; @@ -40,7 +39,8 @@ public ArcticHdfsConfiguration(ArcticCatalogFactory arcticCatalogFactory) { @Override public Configuration getConfiguration(HdfsContext context, URI uri) { - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(this.getClass().getClassLoader())) { + try (ThreadContextClassLoader ignored = + new ThreadContextClassLoader(this.getClass().getClassLoader())) { return arcticCatalogFactory.getTableMetastore().getConfiguration(); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticMetadataFactory.java b/trino/src/main/java/com/netease/arctic/trino/ArcticMetadataFactory.java index 80632835dc..e6ae411f09 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticMetadataFactory.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticMetadataFactory.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino; +import static java.util.Objects.requireNonNull; + import com.netease.arctic.trino.keyed.KeyedConnectorMetadata; import com.netease.arctic.trino.unkeyed.IcebergMetadata; import io.airlift.json.JsonCodec; @@ -29,11 +31,7 @@ import javax.inject.Inject; -import static java.util.Objects.requireNonNull; - -/** - * A factory to generate {@link ArcticConnectorMetadata} - */ +/** A factory to generate {@link ArcticConnectorMetadata} */ public class ArcticMetadataFactory { private final TypeManager typeManager; private final JsonCodec commitTaskCodec; @@ -53,19 +51,23 @@ public ArcticMetadataFactory( this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); - this.tableStatisticsWriter = requireNonNull(tableStatisticsWriter, "tableStatisticsWriter is null"); + this.tableStatisticsWriter = + requireNonNull(tableStatisticsWriter, "tableStatisticsWriter is null"); this.arcticCatalogFactory = arcticCatalogFactory; this.arcticTrinoCatalogFactory = arcticTrinoCatalogFactory; } public ArcticConnectorMetadata create() { - IcebergMetadata icebergMetadata = new IcebergMetadata(typeManager, commitTaskCodec, - arcticTrinoCatalogFactory.create(null), fileSystemFactory, tableStatisticsWriter); + IcebergMetadata icebergMetadata = + new IcebergMetadata( + typeManager, + commitTaskCodec, + arcticTrinoCatalogFactory.create(null), + fileSystemFactory, + tableStatisticsWriter); KeyedConnectorMetadata arcticConnectorMetadata = new KeyedConnectorMetadata(arcticCatalogFactory.getArcticCatalog(), typeManager); return new ArcticConnectorMetadata( - arcticConnectorMetadata, - icebergMetadata, - arcticCatalogFactory.getArcticCatalog()); + arcticConnectorMetadata, icebergMetadata, arcticCatalogFactory.getArcticCatalog()); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticModule.java b/trino/src/main/java/com/netease/arctic/trino/ArcticModule.java index fb002b0825..d476b24b6f 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticModule.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticModule.java @@ -18,6 +18,11 @@ package com.netease.arctic.trino; +import static com.google.inject.multibindings.Multibinder.newSetBinder; +import static io.airlift.configuration.ConfigBinder.configBinder; +import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; +import static org.weakref.jmx.guice.ExportBinder.newExporter; + import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Scopes; @@ -62,14 +67,7 @@ import io.trino.spi.type.TypeManager; import org.weakref.jmx.guice.ExportBinder; -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; -import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - -/** - * Arctic module of Trino - */ +/** Arctic module of Trino */ public class ArcticModule implements Module { private TypeManager typeManager; @@ -86,18 +84,28 @@ public void configure(Binder binder) { binder.bind(IcebergSessionProperties.class).in(Scopes.SINGLETON); binder.bind(KeyedConnectorSplitManager.class).in(Scopes.SINGLETON); binder.bind(KeyedPageSourceProvider.class).in(Scopes.SINGLETON); - binder.bind(ArcticCatalogFactory.class).to(DefaultArcticCatalogFactory.class).in(Scopes.SINGLETON); + binder + .bind(ArcticCatalogFactory.class) + .to(DefaultArcticCatalogFactory.class) + .in(Scopes.SINGLETON); binder.bind(TrinoCatalogFactory.class).to(ArcticTrinoCatalogFactory.class).in(Scopes.SINGLETON); binder.bind(ArcticTransactionManager.class).in(Scopes.SINGLETON); binder.bind(ArcticMetadataFactory.class).in(Scopes.SINGLETON); binder.bind(TableStatisticsWriter.class).in(Scopes.SINGLETON); - binder.bind(ConnectorSplitManager.class).to(ArcticConnectorSplitManager.class).in(Scopes.SINGLETON); - binder.bind(ConnectorPageSourceProvider.class).to(ArcticPageSourceProvider.class).in(Scopes.SINGLETON); + binder + .bind(ConnectorSplitManager.class) + .to(ArcticConnectorSplitManager.class) + .in(Scopes.SINGLETON); + binder + .bind(ConnectorPageSourceProvider.class) + .to(ArcticPageSourceProvider.class) + .in(Scopes.SINGLETON); configBinder(binder).bindConfig(HiveMetastoreConfig.class); configBinder(binder).bindConfig(IcebergConfig.class); - newSetBinder(binder, SessionPropertiesProvider.class).addBinding() + newSetBinder(binder, SessionPropertiesProvider.class) + .addBinding() .to(ArcticSessionProperties.class) .in(Scopes.SINGLETON); binder.bind(IcebergTableProperties.class).in(Scopes.SINGLETON); @@ -106,11 +114,15 @@ public void configure(Binder binder) { binder.bind(IcebergPageSourceProvider.class).in(Scopes.SINGLETON); - binder.bind(ConnectorPageSinkProvider.class) - .to(IcebergPageSinkProvider.class).in(Scopes.SINGLETON); + binder + .bind(ConnectorPageSinkProvider.class) + .to(IcebergPageSinkProvider.class) + .in(Scopes.SINGLETON); - binder.bind(ConnectorNodePartitioningProvider.class) - .to(IcebergNodePartitioningProvider.class).in(Scopes.SINGLETON); + binder + .bind(ConnectorNodePartitioningProvider.class) + .to(IcebergNodePartitioningProvider.class) + .in(Scopes.SINGLETON); configBinder(binder).bindConfig(OrcReaderConfig.class); configBinder(binder).bindConfig(OrcWriterConfig.class); @@ -129,12 +141,19 @@ public void configure(Binder binder) { Multibinder procedures = newSetBinder(binder, Procedure.class); procedures.addBinding().toProvider(RollbackToSnapshotProcedure.class).in(Scopes.SINGLETON); - Multibinder tableProcedures = newSetBinder(binder, TableProcedureMetadata.class); + Multibinder tableProcedures = + newSetBinder(binder, TableProcedureMetadata.class); tableProcedures.addBinding().toProvider(OptimizeTableProcedure.class).in(Scopes.SINGLETON); - tableProcedures.addBinding().toProvider(ExpireSnapshotsTableProcedure.class).in(Scopes.SINGLETON); - tableProcedures.addBinding().toProvider(RemoveOrphanFilesTableProcedure.class).in(Scopes.SINGLETON); + tableProcedures + .addBinding() + .toProvider(ExpireSnapshotsTableProcedure.class) + .in(Scopes.SINGLETON); + tableProcedures + .addBinding() + .toProvider(RemoveOrphanFilesTableProcedure.class) + .in(Scopes.SINGLETON); - //hdfs + // hdfs ConfigBinder.configBinder(binder).bindConfig(HdfsConfig.class); binder.bind(HdfsConfiguration.class).to(ArcticHdfsConfiguration.class).in(Scopes.SINGLETON); binder.bind(HdfsAuthentication.class).to(ArcticHdfsAuthentication.class).in(Scopes.SINGLETON); diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticPageSourceProvider.java b/trino/src/main/java/com/netease/arctic/trino/ArcticPageSourceProvider.java index cfcf1d521f..603c72f792 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticPageSourceProvider.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticPageSourceProvider.java @@ -31,13 +31,13 @@ import io.trino.spi.connector.DynamicFilter; import javax.inject.Inject; + import java.util.List; /** - * {@link ArcticPageSourceProvider} is a Union {@link ConnectorPageSourceProvider} - * contain {@link KeyedPageSourceProvider} and - * {@link IcebergPageSourceProvider}. - * This is final {@link ConnectorPageSourceProvider} provided to Trino + * {@link ArcticPageSourceProvider} is a Union {@link ConnectorPageSourceProvider} contain {@link + * KeyedPageSourceProvider} and {@link IcebergPageSourceProvider}. This is final {@link + * ConnectorPageSourceProvider} provided to Trino */ public class ArcticPageSourceProvider implements ConnectorPageSourceProvider { @@ -56,14 +56,17 @@ public ArcticPageSourceProvider( @Override public ConnectorPageSource createPageSource( ConnectorTransactionHandle transaction, - ConnectorSession session, ConnectorSplit split, - ConnectorTableHandle table, List columns, + ConnectorSession session, + ConnectorSplit split, + ConnectorTableHandle table, + List columns, DynamicFilter dynamicFilter) { if (table instanceof KeyedTableHandle) { - return keyedPageSourceProvider - .createPageSource(transaction, session, split, table, columns, dynamicFilter); + return keyedPageSourceProvider.createPageSource( + transaction, session, split, table, columns, dynamicFilter); } else { - return icebergPageSourceProvider.createPageSource(transaction, session, split, table, columns, dynamicFilter); + return icebergPageSourceProvider.createPageSource( + transaction, session, split, table, columns, dynamicFilter); } } } diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticPlugin.java b/trino/src/main/java/com/netease/arctic/trino/ArcticPlugin.java index 1c929fc3a4..3d1aa91563 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticPlugin.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticPlugin.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -23,9 +22,7 @@ import io.trino.spi.Plugin; import io.trino.spi.connector.ConnectorFactory; -/** - * Arctic Plugin Impl of {@link Plugin} - */ +/** Arctic Plugin Impl of {@link Plugin} */ public class ArcticPlugin implements Plugin { @Override diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticSessionProperties.java b/trino/src/main/java/com/netease/arctic/trino/ArcticSessionProperties.java index 62e24666b6..88123554c3 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticSessionProperties.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticSessionProperties.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,9 @@ package com.netease.arctic.trino; +import static io.trino.spi.session.PropertyMetadata.booleanProperty; +import static io.trino.spi.session.PropertyMetadata.doubleProperty; + import com.google.common.collect.ImmutableList; import io.trino.plugin.base.session.SessionPropertiesProvider; import io.trino.plugin.iceberg.IcebergSessionProperties; @@ -26,47 +28,45 @@ import io.trino.spi.session.PropertyMetadata; import javax.inject.Inject; -import java.util.List; -import static io.trino.spi.session.PropertyMetadata.booleanProperty; -import static io.trino.spi.session.PropertyMetadata.doubleProperty; +import java.util.List; -/** - * Arctic supporting session properties - */ -public final class ArcticSessionProperties - implements SessionPropertiesProvider { +/** Arctic supporting session properties */ +public final class ArcticSessionProperties implements SessionPropertiesProvider { private static final String ARCTIC_STATISTICS_ENABLED = "arctic_table_statistics_enabled"; - private static final String ARCTIC_SPLIT_TASK_BY_DELETE_RATIO = "arctic_split_task_by_delete_ratio"; - private static final String ARCTIC_ENABLE_SPLIT_TASK_BY_DELETE_RATIO = "arctic_enable_split_task_by_delete_ratio"; + private static final String ARCTIC_SPLIT_TASK_BY_DELETE_RATIO = + "arctic_split_task_by_delete_ratio"; + private static final String ARCTIC_ENABLE_SPLIT_TASK_BY_DELETE_RATIO = + "arctic_enable_split_task_by_delete_ratio"; private final List> sessionProperties; @Inject public ArcticSessionProperties( - ArcticConfig arcticConfig, - IcebergSessionProperties icebergSessionProperties) { - sessionProperties = ImmutableList.>builder() - .addAll(icebergSessionProperties.getSessionProperties()) - .add(booleanProperty( - ARCTIC_STATISTICS_ENABLED, - "Expose table statistics for Arctic table", - arcticConfig.isTableStatisticsEnabled(), - false)) - .add(doubleProperty( - ARCTIC_SPLIT_TASK_BY_DELETE_RATIO, - "If task delete ratio less than this value will be split to more task", - arcticConfig.getSplitTaskByDeleteRatio(), - false - )) - .add(booleanProperty( - ARCTIC_ENABLE_SPLIT_TASK_BY_DELETE_RATIO, - "Enable task split by ratio", - arcticConfig.isEnableSplitTaskByDeleteRatio(), - false - )) - .build(); + ArcticConfig arcticConfig, IcebergSessionProperties icebergSessionProperties) { + sessionProperties = + ImmutableList.>builder() + .addAll(icebergSessionProperties.getSessionProperties()) + .add( + booleanProperty( + ARCTIC_STATISTICS_ENABLED, + "Expose table statistics for Arctic table", + arcticConfig.isTableStatisticsEnabled(), + false)) + .add( + doubleProperty( + ARCTIC_SPLIT_TASK_BY_DELETE_RATIO, + "If task delete ratio less than this value will be split to more task", + arcticConfig.getSplitTaskByDeleteRatio(), + false)) + .add( + booleanProperty( + ARCTIC_ENABLE_SPLIT_TASK_BY_DELETE_RATIO, + "Enable task split by ratio", + arcticConfig.isEnableSplitTaskByDeleteRatio(), + false)) + .build(); } @Override diff --git a/trino/src/main/java/com/netease/arctic/trino/ArcticTransactionManager.java b/trino/src/main/java/com/netease/arctic/trino/ArcticTransactionManager.java index 37129b60fb..6062ba9adc 100644 --- a/trino/src/main/java/com/netease/arctic/trino/ArcticTransactionManager.java +++ b/trino/src/main/java/com/netease/arctic/trino/ArcticTransactionManager.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,26 +18,26 @@ package com.netease.arctic.trino; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static java.util.Objects.requireNonNull; + import io.trino.spi.classloader.ThreadContextClassLoader; import io.trino.spi.connector.ConnectorTransactionHandle; import javax.annotation.concurrent.GuardedBy; import javax.inject.Inject; + import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static java.util.Objects.requireNonNull; - -/** - * This is used to guarantee one transaction to one {@link ArcticConnectorMetadata} - */ +/** This is used to guarantee one transaction to one {@link ArcticConnectorMetadata} */ public class ArcticTransactionManager { private final ArcticMetadataFactory metadataFactory; private final ClassLoader classLoader; - private final ConcurrentMap transactions = new ConcurrentHashMap<>(); + private final ConcurrentMap transactions = + new ConcurrentHashMap<>(); @Inject public ArcticTransactionManager(ArcticMetadataFactory metadataFactory) { @@ -51,7 +50,8 @@ public ArcticTransactionManager(ArcticMetadataFactory metadataFactory, ClassLoad } public void begin(ConnectorTransactionHandle transactionHandle) { - MemoizedMetadata previousValue = transactions.putIfAbsent(transactionHandle, new MemoizedMetadata()); + MemoizedMetadata previousValue = + transactions.putIfAbsent(transactionHandle, new MemoizedMetadata()); checkState(previousValue == null); } @@ -67,11 +67,14 @@ public void commit(ConnectorTransactionHandle transaction) { public void rollback(ConnectorTransactionHandle transaction) { MemoizedMetadata transactionalMetadata = transactions.remove(transaction); checkArgument(transactionalMetadata != null, "no such transaction: %s", transaction); - transactionalMetadata.optionalGet().ifPresent(metadata -> { - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - metadata.rollback(); - } - }); + transactionalMetadata + .optionalGet() + .ifPresent( + metadata -> { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + metadata.rollback(); + } + }); } private class MemoizedMetadata { diff --git a/trino/src/main/java/com/netease/arctic/trino/DefaultArcticCatalogFactory.java b/trino/src/main/java/com/netease/arctic/trino/DefaultArcticCatalogFactory.java index c14685f2bb..c7790d10c7 100644 --- a/trino/src/main/java/com/netease/arctic/trino/DefaultArcticCatalogFactory.java +++ b/trino/src/main/java/com/netease/arctic/trino/DefaultArcticCatalogFactory.java @@ -24,12 +24,12 @@ import com.netease.arctic.table.TableMetaStore; import com.netease.arctic.utils.CatalogUtil; import io.trino.spi.classloader.ThreadContextClassLoader; + import javax.inject.Inject; + import java.util.Collections; -/** - * A factory to generate {@link ArcticCatalog} - */ +/** A factory to generate {@link ArcticCatalog} */ public class DefaultArcticCatalogFactory implements ArcticCatalogFactory { private final ArcticConfig arcticConfig; @@ -46,7 +46,8 @@ public ArcticCatalog getArcticCatalog() { if (arcticCatalog == null) { synchronized (this) { if (arcticCatalog == null) { - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(this.getClass().getClassLoader())) { + try (ThreadContextClassLoader ignored = + new ThreadContextClassLoader(this.getClass().getClassLoader())) { this.arcticCatalog = new ArcticCatalogSupportTableSuffix( CatalogLoader.load(arcticConfig.getCatalogUrl(), Collections.emptyMap())); @@ -62,7 +63,8 @@ public TableMetaStore getTableMetastore() { if (this.tableMetaStore == null) { synchronized (this) { if (this.tableMetaStore == null) { - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(this.getClass().getClassLoader())) { + try (ThreadContextClassLoader ignored = + new ThreadContextClassLoader(this.getClass().getClassLoader())) { CatalogMeta meta = CatalogLoader.loadMeta(arcticConfig.getCatalogUrl()); this.tableMetaStore = CatalogUtil.buildMetaStore(meta); } diff --git a/trino/src/main/java/com/netease/arctic/trino/TableNameResolve.java b/trino/src/main/java/com/netease/arctic/trino/TableNameResolve.java index 40195f5a27..4a6742cd07 100644 --- a/trino/src/main/java/com/netease/arctic/trino/TableNameResolve.java +++ b/trino/src/main/java/com/netease/arctic/trino/TableNameResolve.java @@ -20,9 +20,7 @@ import com.netease.arctic.ams.api.Constants; -/** - * To resolve sub table name, such as "tableName#base", "tableName#change" - */ +/** To resolve sub table name, such as "tableName#base", "tableName#change" */ public class TableNameResolve { private static final String SPLIT = "#"; @@ -36,7 +34,7 @@ public class TableNameResolve { public TableNameResolve(String original) { this.original = original; if (original.contains(SPLIT)) { - //use actual db name + // use actual db name if (original.contains(DOT_SPIT)) { String[] tableString = original.split(REGEX_DOT_SPLIT); if (tableString.length == 2) { diff --git a/trino/src/main/java/com/netease/arctic/trino/delete/DummyFileScanTask.java b/trino/src/main/java/com/netease/arctic/trino/delete/DummyFileScanTask.java index a5d32713cd..2abc776746 100644 --- a/trino/src/main/java/com/netease/arctic/trino/delete/DummyFileScanTask.java +++ b/trino/src/main/java/com/netease/arctic/trino/delete/DummyFileScanTask.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino.delete; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableList; import org.apache.iceberg.CombinedScanTask; import org.apache.iceberg.DataFile; @@ -33,13 +35,8 @@ import java.util.List; import java.util.Map; -import static java.util.Objects.requireNonNull; - -/** - * Copy from trino-iceberg DummyFileScanTask and do some change to adapt Arctic - */ -public class DummyFileScanTask - implements FileScanTask { +/** Copy from trino-iceberg DummyFileScanTask and do some change to adapt Arctic */ +public class DummyFileScanTask implements FileScanTask { private final DataFile file; private final List deletes; @@ -109,8 +106,7 @@ public CombinedScanTask asCombinedScanTask() { throw new UnsupportedOperationException(); } - private static class DummyDataFile - implements DataFile { + private static class DummyDataFile implements DataFile { private final String path; private DummyDataFile(String path) { diff --git a/trino/src/main/java/com/netease/arctic/trino/delete/TrinoDeleteFile.java b/trino/src/main/java/com/netease/arctic/trino/delete/TrinoDeleteFile.java index 2ec62fd7ed..317e617614 100644 --- a/trino/src/main/java/com/netease/arctic/trino/delete/TrinoDeleteFile.java +++ b/trino/src/main/java/com/netease/arctic/trino/delete/TrinoDeleteFile.java @@ -18,6 +18,12 @@ package com.netease.arctic.trino.delete; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static io.airlift.slice.SizeOf.SIZE_OF_INT; +import static io.airlift.slice.SizeOf.SIZE_OF_LONG; +import static io.airlift.slice.SizeOf.estimatedSizeOf; +import static java.util.Objects.requireNonNull; + import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; @@ -30,52 +36,34 @@ import org.openjdk.jol.info.ClassLayout; import javax.annotation.Nullable; + import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.function.ToLongFunction; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static io.airlift.slice.SizeOf.SIZE_OF_INT; -import static io.airlift.slice.SizeOf.SIZE_OF_LONG; -import static io.airlift.slice.SizeOf.estimatedSizeOf; -import static java.util.Objects.requireNonNull; +/** Copy from trino-iceberg TrinoDeleteFile and do some change to adapt Arctic */ +public class TrinoDeleteFile implements DeleteFile { + private static final long INSTANCE_SIZE = + ClassLayout.parseClass(TrinoDeleteFile.class).instanceSize(); -/** - * Copy from trino-iceberg TrinoDeleteFile and do some change to adapt Arctic - */ -public class TrinoDeleteFile - implements DeleteFile { - private static final long INSTANCE_SIZE = ClassLayout.parseClass(TrinoDeleteFile.class).instanceSize(); - - @Nullable - private final Long pos; + @Nullable private final Long pos; private final int specId; private final FileContent fileContent; private final String path; private final FileFormat format; private final long recordCount; private final long fileSizeInBytes; - @Nullable - private final Map columnSizes; - @Nullable - private final Map valueCounts; - @Nullable - private final Map nullValueCounts; - @Nullable - private final Map nanValueCounts; - @Nullable - private final Map lowerBounds; - @Nullable - private final Map upperBounds; - @Nullable - private final byte[] keyMetadata; - @Nullable - private final List equalityFieldIds; - @Nullable - private final Integer sortOrderId; - @Nullable - private final List splitOffsets; + @Nullable private final Map columnSizes; + @Nullable private final Map valueCounts; + @Nullable private final Map nullValueCounts; + @Nullable private final Map nanValueCounts; + @Nullable private final Map lowerBounds; + @Nullable private final Map upperBounds; + @Nullable private final byte[] keyMetadata; + @Nullable private final List equalityFieldIds; + @Nullable private final Integer sortOrderId; + @Nullable private final List splitOffsets; public static TrinoDeleteFile copyOf(DeleteFile deleteFile) { return new TrinoDeleteFile( @@ -90,10 +78,18 @@ public static TrinoDeleteFile copyOf(DeleteFile deleteFile) { deleteFile.valueCounts(), deleteFile.nullValueCounts(), deleteFile.nanValueCounts(), - deleteFile.lowerBounds() == null ? null : deleteFile.lowerBounds().entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().array())), - deleteFile.upperBounds() == null ? null : deleteFile.upperBounds().entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().array())), + deleteFile.lowerBounds() == null + ? null + : deleteFile.lowerBounds().entrySet().stream() + .collect( + toImmutableMap( + Map.Entry::getKey, entry -> entry.getValue().array())), + deleteFile.upperBounds() == null + ? null + : deleteFile.upperBounds().entrySet().stream() + .collect( + toImmutableMap( + Map.Entry::getKey, entry -> entry.getValue().array())), deleteFile.keyMetadata() == null ? null : deleteFile.keyMetadata().array(), deleteFile.equalityFieldIds(), deleteFile.sortOrderId(), @@ -133,7 +129,8 @@ public TrinoDeleteFile( this.lowerBounds = lowerBounds == null ? null : ImmutableMap.copyOf(lowerBounds); this.upperBounds = upperBounds == null ? null : ImmutableMap.copyOf(upperBounds); this.keyMetadata = keyMetadata == null ? null : keyMetadata.clone(); - this.equalityFieldIds = equalityFieldIds == null ? null : ImmutableList.copyOf(equalityFieldIds); + this.equalityFieldIds = + equalityFieldIds == null ? null : ImmutableList.copyOf(equalityFieldIds); this.sortOrderId = sortOrderId; this.splitOffsets = splitOffsets == null ? null : ImmutableList.copyOf(splitOffsets); } @@ -314,15 +311,15 @@ public DeleteFile copyWithoutStats() { public long getRetainedSizeInBytes() { ToLongFunction intSizeOf = ignored -> SIZE_OF_INT; ToLongFunction longSizeOf = ignored -> SIZE_OF_LONG; - return INSTANCE_SIZE + - estimatedSizeOf(path) + - estimatedSizeOf(columnSizes, intSizeOf, longSizeOf) + - estimatedSizeOf(nullValueCounts, intSizeOf, longSizeOf) + - estimatedSizeOf(nanValueCounts, intSizeOf, longSizeOf) + - estimatedSizeOf(lowerBounds, intSizeOf, value -> value.length) + - estimatedSizeOf(upperBounds, intSizeOf, value -> value.length) + - (keyMetadata == null ? 0 : keyMetadata.length) + - estimatedSizeOf(equalityFieldIds, intSizeOf) + - estimatedSizeOf(splitOffsets, longSizeOf); + return INSTANCE_SIZE + + estimatedSizeOf(path) + + estimatedSizeOf(columnSizes, intSizeOf, longSizeOf) + + estimatedSizeOf(nullValueCounts, intSizeOf, longSizeOf) + + estimatedSizeOf(nanValueCounts, intSizeOf, longSizeOf) + + estimatedSizeOf(lowerBounds, intSizeOf, value -> value.length) + + estimatedSizeOf(upperBounds, intSizeOf, value -> value.length) + + (keyMetadata == null ? 0 : keyMetadata.length) + + estimatedSizeOf(equalityFieldIds, intSizeOf) + + estimatedSizeOf(splitOffsets, longSizeOf); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/delete/TrinoRow.java b/trino/src/main/java/com/netease/arctic/trino/delete/TrinoRow.java index 17f4aaa188..0c54f811ac 100644 --- a/trino/src/main/java/com/netease/arctic/trino/delete/TrinoRow.java +++ b/trino/src/main/java/com/netease/arctic/trino/delete/TrinoRow.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.delete; +import static com.google.common.base.Preconditions.checkArgument; +import static io.trino.plugin.iceberg.IcebergPageSink.getIcebergValue; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.AbstractIterator; import io.trino.spi.Page; import io.trino.spi.type.Type; @@ -25,15 +29,8 @@ import javax.annotation.Nullable; -import static com.google.common.base.Preconditions.checkArgument; -import static io.trino.plugin.iceberg.IcebergPageSink.getIcebergValue; -import static java.util.Objects.requireNonNull; - -/** - * Copy from trino-iceberg TrinoRow and do some change to adapt Arctic - */ -public class TrinoRow - implements StructLike { +/** Copy from trino-iceberg TrinoRow and do some change to adapt Arctic */ +public class TrinoRow implements StructLike { private final Type[] types; private final Page page; private final int position; @@ -45,9 +42,7 @@ private TrinoRow(Type[] types, Page page, int position) { this.position = position; } - /** - * Gets the position in the Block this row was originally created from. - */ + /** Gets the position in the Block this row was originally created from. */ public int getPosition() { return position; } @@ -68,20 +63,20 @@ public void set(int i, T t) { } public static Iterable fromPage(Type[] types, Page page, int positionCount) { - return () -> new AbstractIterator<>() { - private int nextPosition; + return () -> + new AbstractIterator<>() { + private int nextPosition; - @Nullable - @Override - protected TrinoRow computeNext() { - if (nextPosition == positionCount) { - return endOfData(); - } - int position = nextPosition; - nextPosition++; - return new TrinoRow(types, page, position); - } - }; + @Nullable + @Override + protected TrinoRow computeNext() { + if (nextPosition == positionCount) { + return endOfData(); + } + int position = nextPosition; + nextPosition++; + return new TrinoRow(types, page, position); + } + }; } } - diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorMetadata.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorMetadata.java index c66d49647d..131b1c7817 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorMetadata.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorMetadata.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,19 @@ package com.netease.arctic.trino.keyed; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.netease.arctic.trino.ArcticSessionProperties.isArcticStatisticsEnabled; +import static io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns; +import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables; +import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; +import static io.trino.plugin.hive.util.HiveUtil.isStructuralType; +import static io.trino.plugin.iceberg.IcebergUtil.getColumns; +import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; +import static io.trino.spi.connector.RetryMode.NO_RETRIES; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -78,22 +90,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.netease.arctic.trino.ArcticSessionProperties.isArcticStatisticsEnabled; -import static io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns; -import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables; -import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; -import static io.trino.plugin.hive.util.HiveUtil.isStructuralType; -import static io.trino.plugin.iceberg.IcebergUtil.getColumns; -import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; - -/** - * Metadata for Keyed Table - */ +/** Metadata for Keyed Table */ public class KeyedConnectorMetadata implements ConnectorMetadata { private static final Logger log = LoggerFactory.getLogger(KeyedConnectorMetadata.class); @@ -102,9 +99,11 @@ public class KeyedConnectorMetadata implements ConnectorMetadata { private TypeManager typeManager; - private ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap<>(); + private ConcurrentHashMap concurrentHashMap = + new ConcurrentHashMap<>(); - private final Map tableStatisticsCache = new ConcurrentHashMap<>(); + private final Map tableStatisticsCache = + new ConcurrentHashMap<>(); public KeyedConnectorMetadata(ArcticCatalog arcticCatalog, TypeManager typeManager) { this.arcticCatalog = arcticCatalog; @@ -113,7 +112,9 @@ public KeyedConnectorMetadata(ArcticCatalog arcticCatalog, TypeManager typeManag @Override public List listSchemaNames(ConnectorSession session) { - return arcticCatalog.listDatabases().stream().map(s -> s.toLowerCase(Locale.ROOT)).collect(Collectors.toList()); + return arcticCatalog.listDatabases().stream() + .map(s -> s.toLowerCase(Locale.ROOT)) + .collect(Collectors.toList()); } @Override @@ -126,37 +127,41 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable TableIdentifier tableIdentifier = arcticTable.id(); Map tableProperties = arcticTable.properties(); String nameMappingJson = tableProperties.get(TableProperties.DEFAULT_NAME_MAPPING); - IcebergTableHandle icebergTableHandle = new IcebergTableHandle( - tableName.getSchemaName(), - tableIdentifier.getTableName(), - TableType.DATA, - Optional.empty(), - SchemaParser.toJson(arcticTable.schema()), - Optional.of(arcticTable.spec()).map(PartitionSpecParser::toJson), - 2, - TupleDomain.all(), - TupleDomain.all(), - ImmutableSet.of(), - Optional.ofNullable(nameMappingJson), - arcticTable.location(), - tableProperties, - NO_RETRIES, - ImmutableList.of(), - false, - Optional.empty()); - - return new KeyedTableHandle(icebergTableHandle, ObjectSerializerUtil.write(arcticTable.primaryKeySpec())); + IcebergTableHandle icebergTableHandle = + new IcebergTableHandle( + tableName.getSchemaName(), + tableIdentifier.getTableName(), + TableType.DATA, + Optional.empty(), + SchemaParser.toJson(arcticTable.schema()), + Optional.of(arcticTable.spec()).map(PartitionSpecParser::toJson), + 2, + TupleDomain.all(), + TupleDomain.all(), + ImmutableSet.of(), + Optional.ofNullable(nameMappingJson), + arcticTable.location(), + tableProperties, + NO_RETRIES, + ImmutableList.of(), + false, + Optional.empty()); + + return new KeyedTableHandle( + icebergTableHandle, ObjectSerializerUtil.write(arcticTable.primaryKeySpec())); } @Override - public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { + public ConnectorTableMetadata getTableMetadata( + ConnectorSession session, ConnectorTableHandle table) { KeyedTableHandle keyedTableHandle = (KeyedTableHandle) table; IcebergTableHandle icebergTableHandle = keyedTableHandle.getIcebergTableHandle(); SchemaTableName schemaTableName = new SchemaTableName(icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName()); - ArcticTable arcticTable = getArcticTable(new SchemaTableName( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName())); + ArcticTable arcticTable = + getArcticTable( + new SchemaTableName( + icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName())); if (arcticTable == null) { throw new TableNotFoundException(schemaTableName); } @@ -165,14 +170,16 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect } @Override - public Map getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Map getColumnHandles( + ConnectorSession session, ConnectorTableHandle tableHandle) { KeyedTableHandle keyedTableHandle = (KeyedTableHandle) tableHandle; IcebergTableHandle icebergTableHandle = keyedTableHandle.getIcebergTableHandle(); SchemaTableName schemaTableName = new SchemaTableName(icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName()); - ArcticTable arcticTable = getArcticTable(new SchemaTableName( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName())); + ArcticTable arcticTable = + getArcticTable( + new SchemaTableName( + icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName())); if (arcticTable == null) { throw new TableNotFoundException(schemaTableName); } @@ -187,14 +194,9 @@ public Map getColumnHandles(ConnectorSession session, Conn @Override public ColumnMetadata getColumnMetadata( - ConnectorSession session, - ConnectorTableHandle tableHandle, - ColumnHandle columnHandle) { + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { IcebergColumnHandle column = (IcebergColumnHandle) columnHandle; - return ColumnMetadata.builder() - .setName(column.getName()) - .setType(column.getType()) - .build(); + return ColumnMetadata.builder().setName(column.getName()).setType(column.getType()).build(); } private List getColumnMetadata(ArcticTable arcticTable) { @@ -209,18 +211,18 @@ private List getColumnMetadata(ArcticTable arcticTable) { .setType(toTrinoType(column.type(), typeManager)) .setNullable(column.isOptional()) .setExtraInfo(Optional.of(column.fieldId() + "")) - .build() - ); + .build()); } return columnsMetadata.build(); } @Override public Map> listTableColumns( - ConnectorSession session, - SchemaTablePrefix prefix) { - List schemaTableNames = !prefix.getTable().isPresent() ? - listTables(session, prefix.getSchema()) : Lists.newArrayList(prefix.toSchemaTableName()); + ConnectorSession session, SchemaTablePrefix prefix) { + List schemaTableNames = + !prefix.getTable().isPresent() + ? listTables(session, prefix.getSchema()) + : Lists.newArrayList(prefix.toSchemaTableName()); ImmutableMap.Builder> columns = ImmutableMap.builder(); for (SchemaTableName schemaTableName : schemaTableNames) { try { @@ -238,8 +240,7 @@ public Map> listTableColumns( @Override public List listTables(ConnectorSession session, Optional schemaName) { - return listNamespaces(session, schemaName) - .stream() + return listNamespaces(session, schemaName).stream() .flatMap(s -> arcticCatalog.listTables(s).stream()) .map(s -> new SchemaTableName(s.getDatabase(), s.getTableName())) .collect(Collectors.toList()); @@ -247,61 +248,67 @@ public List listTables(ConnectorSession session, Optional> applyFilter( - ConnectorSession session, - ConnectorTableHandle handle, - Constraint constraint) { + ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) { KeyedTableHandle table = (KeyedTableHandle) handle; IcebergTableHandle icebergTableHandle = table.getIcebergTableHandle(); - ArcticTable arcticTable = getArcticTable(new SchemaTableName( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName())); + ArcticTable arcticTable = + getArcticTable( + new SchemaTableName( + icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName())); Set partitionSourceIds = identityPartitionColumnsInAllSpecs(arcticTable); BiPredicate isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId()); - TupleDomain newEnforcedConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition) - .intersect(icebergTableHandle.getEnforcedPredicate()); - - TupleDomain remainingConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition.negate()); - - TupleDomain newUnenforcedConstraint = remainingConstraint - // Only applies to the unenforced constraint because structural types cannot be partition keys - .filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())) - .intersect(icebergTableHandle.getUnenforcedPredicate()); - - if (newEnforcedConstraint.equals(icebergTableHandle.getEnforcedPredicate()) && - newUnenforcedConstraint.equals(icebergTableHandle.getUnenforcedPredicate())) { + TupleDomain newEnforcedConstraint = + constraint + .getSummary() + .transformKeys(IcebergColumnHandle.class::cast) + .filter(isIdentityPartition) + .intersect(icebergTableHandle.getEnforcedPredicate()); + + TupleDomain remainingConstraint = + constraint + .getSummary() + .transformKeys(IcebergColumnHandle.class::cast) + .filter(isIdentityPartition.negate()); + + TupleDomain newUnenforcedConstraint = + remainingConstraint + // Only applies to the unenforced constraint because structural types cannot be + // partition keys + .filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())) + .intersect(icebergTableHandle.getUnenforcedPredicate()); + + if (newEnforcedConstraint.equals(icebergTableHandle.getEnforcedPredicate()) + && newUnenforcedConstraint.equals(icebergTableHandle.getUnenforcedPredicate())) { return Optional.empty(); } - IcebergTableHandle newIcebergTableHandle = new IcebergTableHandle( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName(), - icebergTableHandle.getTableType(), - icebergTableHandle.getSnapshotId(), - icebergTableHandle.getTableSchemaJson(), - icebergTableHandle.getPartitionSpecJson(), - 2, - newUnenforcedConstraint, - newEnforcedConstraint, - icebergTableHandle.getProjectedColumns(), - icebergTableHandle.getNameMappingJson(), - icebergTableHandle.getTableLocation(), - icebergTableHandle.getStorageProperties(), - icebergTableHandle.getRetryMode(), - icebergTableHandle.getUpdatedColumns(), - icebergTableHandle.isRecordScannedFiles(), - icebergTableHandle.getMaxScannedFileSize() - ); - return Optional.of(new ConstraintApplicationResult<>( - new KeyedTableHandle(newIcebergTableHandle, table.getPrimaryKeySpecBytes()), - remainingConstraint.transformKeys(ColumnHandle.class::cast), - false)); + IcebergTableHandle newIcebergTableHandle = + new IcebergTableHandle( + icebergTableHandle.getSchemaName(), + icebergTableHandle.getTableName(), + icebergTableHandle.getTableType(), + icebergTableHandle.getSnapshotId(), + icebergTableHandle.getTableSchemaJson(), + icebergTableHandle.getPartitionSpecJson(), + 2, + newUnenforcedConstraint, + newEnforcedConstraint, + icebergTableHandle.getProjectedColumns(), + icebergTableHandle.getNameMappingJson(), + icebergTableHandle.getTableLocation(), + icebergTableHandle.getStorageProperties(), + icebergTableHandle.getRetryMode(), + icebergTableHandle.getUpdatedColumns(), + icebergTableHandle.isRecordScannedFiles(), + icebergTableHandle.getMaxScannedFileSize()); + return Optional.of( + new ConstraintApplicationResult<>( + new KeyedTableHandle(newIcebergTableHandle, table.getPrimaryKeySpecBytes()), + remainingConstraint.transformKeys(ColumnHandle.class::cast), + false)); } @Override @@ -310,60 +317,73 @@ public Optional> applyProjecti ConnectorTableHandle handle, List projections, Map assignments) { - // Create projected column representations for supported sub expressions. Simple column references and chain of + // Create projected column representations for supported sub expressions. Simple column + // references and chain of // dereferences on a variable are supported right now. - Set projectedExpressions = projections.stream() - .flatMap(expression -> extractSupportedProjectedColumns(expression).stream()) - .collect(toImmutableSet()); - - Map columnProjections = - projectedExpressions.stream() - .collect(toImmutableMap(Function.identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation)); + Set projectedExpressions = + projections.stream() + .flatMap(expression -> extractSupportedProjectedColumns(expression).stream()) + .collect(toImmutableSet()); + + Map + columnProjections = + projectedExpressions.stream() + .collect( + toImmutableMap( + Function.identity(), + HiveApplyProjectionUtil::createProjectedColumnRepresentation)); KeyedTableHandle keyedTableHandle = (KeyedTableHandle) handle; IcebergTableHandle icebergTableHandle = keyedTableHandle.getIcebergTableHandle(); // all references are simple variables - if (columnProjections.values() - .stream() + if (columnProjections.values().stream() .allMatch(HiveApplyProjectionUtil.ProjectedColumnRepresentation::isVariable)) { - Set projectedColumns = assignments.values().stream() - .map(IcebergColumnHandle.class::cast) - .collect(toImmutableSet()); + Set projectedColumns = + assignments.values().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableSet()); if (icebergTableHandle.getProjectedColumns().equals(projectedColumns)) { return Optional.empty(); } - List assignmentsList = assignments.entrySet().stream() - .map(assignment -> new Assignment( - assignment.getKey(), - assignment.getValue(), - ((IcebergColumnHandle) assignment.getValue()).getType())) - .collect(toImmutableList()); - - return Optional.of(new ProjectionApplicationResult<>( - keyedTableHandle.withProjectedColumns(projectedColumns), - projections, - assignmentsList, - false)); + List assignmentsList = + assignments.entrySet().stream() + .map( + assignment -> + new Assignment( + assignment.getKey(), + assignment.getValue(), + ((IcebergColumnHandle) assignment.getValue()).getType())) + .collect(toImmutableList()); + + return Optional.of( + new ProjectionApplicationResult<>( + keyedTableHandle.withProjectedColumns(projectedColumns), + projections, + assignmentsList, + false)); } Map newAssignments = new HashMap<>(); - ImmutableMap.Builder newVariablesBuilder = ImmutableMap.builder(); + ImmutableMap.Builder newVariablesBuilder = + ImmutableMap.builder(); ImmutableSet.Builder projectedColumnsBuilder = ImmutableSet.builder(); - for (Map.Entry entry : - columnProjections.entrySet()) { + for (Map.Entry + entry : columnProjections.entrySet()) { ConnectorExpression expression = entry.getKey(); HiveApplyProjectionUtil.ProjectedColumnRepresentation projectedColumn = entry.getValue(); IcebergColumnHandle baseColumnHandle = (IcebergColumnHandle) assignments.get(projectedColumn.getVariable().getName()); IcebergColumnHandle projectedColumnHandle = - createProjectedColumnHandle(baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType()); + createProjectedColumnHandle( + baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType()); String projectedColumnName = projectedColumnHandle.getQualifiedName(); Variable projectedColumnVariable = new Variable(projectedColumnName, expression.getType()); - Assignment newAssignment = new Assignment(projectedColumnName, projectedColumnHandle, expression.getType()); + Assignment newAssignment = + new Assignment(projectedColumnName, projectedColumnHandle, expression.getType()); newAssignments.putIfAbsent(projectedColumnName, newAssignment); newVariablesBuilder.put(expression, projectedColumnVariable); @@ -372,20 +392,24 @@ public Optional> applyProjecti // Modify projections to refer to new variables Map newVariables = newVariablesBuilder.buildOrThrow(); - List newProjections = projections.stream() - .map(expression -> replaceWithNewVariables(expression, newVariables)) - .collect(toImmutableList()); - - List outputAssignments = newAssignments.values().stream().collect(toImmutableList()); - return Optional.of(new ProjectionApplicationResult<>( - keyedTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), - newProjections, - outputAssignments, - false)); + List newProjections = + projections.stream() + .map(expression -> replaceWithNewVariables(expression, newVariables)) + .collect(toImmutableList()); + + List outputAssignments = + newAssignments.values().stream().collect(toImmutableList()); + return Optional.of( + new ProjectionApplicationResult<>( + keyedTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), + newProjections, + outputAssignments, + false)); } @Override - public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle) { + public TableStatistics getTableStatistics( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (!isArcticStatisticsEnabled(session)) { return TableStatistics.empty(); } @@ -396,7 +420,8 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab // If this changes, the caching logic may here may need to be revised. checkArgument(originalHandle.getUpdatedColumns().isEmpty(), "Unexpected updated columns"); checkArgument(!originalHandle.isRecordScannedFiles(), "Unexpected scanned files recording set"); - checkArgument(originalHandle.getMaxScannedFileSize().isEmpty(), "Unexpected max scanned file size set"); + checkArgument( + originalHandle.getMaxScannedFileSize().isEmpty(), "Unexpected max scanned file size set"); return tableStatisticsCache.computeIfAbsent( new IcebergTableHandle( @@ -418,30 +443,49 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab originalHandle.isRecordScannedFiles(), originalHandle.getMaxScannedFileSize()), handle -> { - ArcticTable arcticTable = getArcticTable(new SchemaTableName( - originalHandle.getSchemaName(), originalHandle.getTableName())); - TableStatistics baseTableStatistics = TableStatisticsReader.getTableStatistics( - typeManager, - session, - withSnapshotId(handle, arcticTable.asKeyedTable().baseTable().currentSnapshot().snapshotId()), - arcticTable.asKeyedTable().baseTable()); - TableStatistics changeTableStatistics = TableStatisticsReader.getTableStatistics( - typeManager, - session, - withSnapshotId(handle, arcticTable.asKeyedTable().changeTable().currentSnapshot().snapshotId()), - arcticTable.asKeyedTable().changeTable()); + ArcticTable arcticTable = + getArcticTable( + new SchemaTableName( + originalHandle.getSchemaName(), originalHandle.getTableName())); + TableStatistics baseTableStatistics = + TableStatisticsReader.getTableStatistics( + typeManager, + session, + withSnapshotId( + handle, + arcticTable.asKeyedTable().baseTable().currentSnapshot().snapshotId()), + arcticTable.asKeyedTable().baseTable()); + TableStatistics changeTableStatistics = + TableStatisticsReader.getTableStatistics( + typeManager, + session, + withSnapshotId( + handle, + arcticTable.asKeyedTable().changeTable().currentSnapshot().snapshotId()), + arcticTable.asKeyedTable().changeTable()); return computeBothTablesStatistics(baseTableStatistics, changeTableStatistics); }); } private static IcebergTableHandle withSnapshotId(IcebergTableHandle handle, long snapshotId) { return new IcebergTableHandle( - handle.getSchemaName(), handle.getTableName(), handle.getTableType(), + handle.getSchemaName(), + handle.getTableName(), + handle.getTableType(), Optional.of(snapshotId), - handle.getTableSchemaJson(), handle.getPartitionSpecJson(), handle.getFormatVersion(), - handle.getUnenforcedPredicate(), handle.getEnforcedPredicate(), handle.getProjectedColumns(), - handle.getNameMappingJson(), handle.getTableLocation(), handle.getStorageProperties(), handle.getRetryMode(), - handle.getUpdatedColumns(), handle.isRecordScannedFiles(), handle.getMaxScannedFileSize()); + handle.getTableSchemaJson(), + handle.getPartitionSpecJson(), + handle.getFormatVersion(), + handle.getUnenforcedPredicate(), + handle.getEnforcedPredicate(), + handle.getProjectedColumns(), + handle.getNameMappingJson(), + handle.getTableLocation(), + handle.getStorageProperties(), + handle.getRetryMode(), + handle.getUpdatedColumns(), + handle.isRecordScannedFiles(), + handle.getMaxScannedFileSize()); } private static TableStatistics computeBothTablesStatistics( @@ -449,66 +493,77 @@ private static TableStatistics computeBothTablesStatistics( double baseRowCount = baseTableStatistics.getRowCount().getValue(); double changeRowCount = changeTableStatistics.getRowCount().getValue(); Estimate rowCount = Estimate.of(baseRowCount + changeRowCount); - Map baseColumnStatistics = baseTableStatistics.getColumnStatistics(); - Map changeColumnStatistics = changeTableStatistics.getColumnStatistics(); + Map baseColumnStatistics = + baseTableStatistics.getColumnStatistics(); + Map changeColumnStatistics = + changeTableStatistics.getColumnStatistics(); Map newColumnStatistics = new HashMap<>(); - changeColumnStatistics.forEach((columnHandle, statisticsOfChangeColumn) -> { - ColumnStatistics statisticsOfBaseColumn = baseColumnStatistics.get(columnHandle); - ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder(); - - Estimate baseDataSize = statisticsOfBaseColumn.getDataSize(); - Estimate changeDataSize = statisticsOfChangeColumn.getDataSize(); - if (!baseDataSize.isUnknown() || !changeDataSize.isUnknown()) { - double value = Stream.of(baseDataSize, changeDataSize) - .mapToDouble(Estimate::getValue) - .average() - .getAsDouble(); - columnBuilder.setDataSize(Double.isNaN(value) ? Estimate.unknown() : Estimate.of(value)); - } - - Optional baseRange = statisticsOfBaseColumn.getRange(); - Optional changeRange = statisticsOfChangeColumn.getRange(); - if (baseRange.isPresent() && changeRange.isPresent()) { - columnBuilder.setRange(DoubleRange.union(baseRange.get(), changeRange.get())); - } else { - columnBuilder.setRange(baseRange.isPresent() ? baseRange : changeRange); - } - - Estimate baseNullsFraction = statisticsOfBaseColumn.getNullsFraction(); - Estimate changeNullsFraction = statisticsOfChangeColumn.getNullsFraction(); - if (!baseNullsFraction.isUnknown() && !changeNullsFraction.isUnknown()) { - columnBuilder.setNullsFraction(Estimate.of( - ((baseNullsFraction.getValue() * baseRowCount) + - (statisticsOfChangeColumn.getNullsFraction().getValue() * changeRowCount)) / - (baseRowCount + changeRowCount))); - } else { - columnBuilder.setNullsFraction(baseNullsFraction.isUnknown() ? changeNullsFraction : baseNullsFraction); - } - - Estimate baseDistinctValue = statisticsOfBaseColumn.getDistinctValuesCount(); - Estimate changeDistinctValue = statisticsOfChangeColumn.getDistinctValuesCount(); - if (!baseDistinctValue.isUnknown() || !changeDistinctValue.isUnknown()) { - double value = Stream.of(baseDistinctValue, changeDistinctValue) - .mapToDouble(Estimate::getValue) - .map(dataSize -> Double.isNaN(dataSize) ? 0 : dataSize) - .sum(); - columnBuilder.setDistinctValuesCount(Estimate.of(value)); - } - - ColumnStatistics columnStatistics = columnBuilder.build(); - newColumnStatistics.put(columnHandle, columnStatistics); - }); + changeColumnStatistics.forEach( + (columnHandle, statisticsOfChangeColumn) -> { + ColumnStatistics statisticsOfBaseColumn = baseColumnStatistics.get(columnHandle); + ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder(); + + Estimate baseDataSize = statisticsOfBaseColumn.getDataSize(); + Estimate changeDataSize = statisticsOfChangeColumn.getDataSize(); + if (!baseDataSize.isUnknown() || !changeDataSize.isUnknown()) { + double value = + Stream.of(baseDataSize, changeDataSize) + .mapToDouble(Estimate::getValue) + .average() + .getAsDouble(); + columnBuilder.setDataSize( + Double.isNaN(value) ? Estimate.unknown() : Estimate.of(value)); + } + + Optional baseRange = statisticsOfBaseColumn.getRange(); + Optional changeRange = statisticsOfChangeColumn.getRange(); + if (baseRange.isPresent() && changeRange.isPresent()) { + columnBuilder.setRange(DoubleRange.union(baseRange.get(), changeRange.get())); + } else { + columnBuilder.setRange(baseRange.isPresent() ? baseRange : changeRange); + } + + Estimate baseNullsFraction = statisticsOfBaseColumn.getNullsFraction(); + Estimate changeNullsFraction = statisticsOfChangeColumn.getNullsFraction(); + if (!baseNullsFraction.isUnknown() && !changeNullsFraction.isUnknown()) { + columnBuilder.setNullsFraction( + Estimate.of( + ((baseNullsFraction.getValue() * baseRowCount) + + (statisticsOfChangeColumn.getNullsFraction().getValue() + * changeRowCount)) + / (baseRowCount + changeRowCount))); + } else { + columnBuilder.setNullsFraction( + baseNullsFraction.isUnknown() ? changeNullsFraction : baseNullsFraction); + } + + Estimate baseDistinctValue = statisticsOfBaseColumn.getDistinctValuesCount(); + Estimate changeDistinctValue = statisticsOfChangeColumn.getDistinctValuesCount(); + if (!baseDistinctValue.isUnknown() || !changeDistinctValue.isUnknown()) { + double value = + Stream.of(baseDistinctValue, changeDistinctValue) + .mapToDouble(Estimate::getValue) + .map(dataSize -> Double.isNaN(dataSize) ? 0 : dataSize) + .sum(); + columnBuilder.setDistinctValuesCount(Estimate.of(value)); + } + + ColumnStatistics columnStatistics = columnBuilder.build(); + newColumnStatistics.put(columnHandle, columnStatistics); + }); return new TableStatistics(rowCount, newColumnStatistics); } private static Set identityPartitionColumnsInAllSpecs(ArcticTable table) { // Extract identity partition column source ids common to ALL specs - return table.spec().partitionType().fields() - .stream().map(s -> s.fieldId()).collect(Collectors.toUnmodifiableSet()); + return table.spec().partitionType().fields().stream() + .map(s -> s.fieldId()) + .collect(Collectors.toUnmodifiableSet()); } private static IcebergColumnHandle createProjectedColumnHandle( - IcebergColumnHandle column, List indices, + IcebergColumnHandle column, + List indices, io.trino.spi.type.Type projectedColumnType) { if (indices.isEmpty()) { return column; @@ -534,9 +589,12 @@ private static IcebergColumnHandle createProjectedColumnHandle( public ArcticTable getArcticTable(SchemaTableName schemaTableName) { concurrentHashMap.computeIfAbsent( schemaTableName, - ignore -> arcticCatalog.loadTable(TableIdentifier.of(arcticCatalog.name(), - schemaTableName.getSchemaName(), schemaTableName.getTableName())) - ); + ignore -> + arcticCatalog.loadTable( + TableIdentifier.of( + arcticCatalog.name(), + schemaTableName.getSchemaName(), + schemaTableName.getTableName()))); return concurrentHashMap.get(schemaTableName); } diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorPageSource.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorPageSource.java index ba7474fc42..fbf0589b45 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorPageSource.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorPageSource.java @@ -18,6 +18,12 @@ package com.netease.arctic.trino.keyed; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static com.netease.arctic.ArcticErrorCode.ARCTIC_BAD_DATA; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableList; import com.netease.arctic.data.DataFileType; import com.netease.arctic.data.PrimaryKeyedFile; @@ -52,15 +58,7 @@ import java.util.TimeZone; import java.util.stream.Collectors; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Throwables.throwIfInstanceOf; -import static com.netease.arctic.ArcticErrorCode.ARCTIC_BAD_DATA; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; -import static java.util.Objects.requireNonNull; - -/** - * ConnectorPageSource for Keyed Table - */ +/** ConnectorPageSource for Keyed Table */ public class KeyedConnectorPageSource implements ConnectorPageSource { private IcebergPageSourceProvider icebergPageSourceProvider; @@ -106,7 +104,8 @@ public KeyedConnectorPageSource( this.typeManager = typeManager; this.arcticDeleteFilter = arcticDeleteFilter; - this.requireColumnsDummy = requiredColumns.stream().map(ColumnHandle.class::cast).collect(Collectors.toList()); + this.requireColumnsDummy = + requiredColumns.stream().map(ColumnHandle.class::cast).collect(Collectors.toList()); this.expectedColumnIndexes = new int[expectedColumns.size()]; for (int i = 0; i < expectedColumns.size(); i++) { checkArgument( @@ -115,9 +114,8 @@ public KeyedConnectorPageSource( expectedColumnIndexes[i] = i; } - this.requireColumnTypes = requiredColumns.stream() - .map(IcebergColumnHandle::getType) - .toArray(Type[]::new); + this.requireColumnTypes = + requiredColumns.stream().map(IcebergColumnHandle::getType).toArray(Type[]::new); this.dataTasksIt = split.getKeyedTableScanTask().dataTasks().iterator(); } @@ -170,18 +168,20 @@ public Page getNextPage() { int positionCount = page.getPositionCount(); int[] positionsToKeep = new int[positionCount]; try (CloseableIterable filteredRows = - arcticDeleteFilter.filter(CloseableIterable.withNoopClose(TrinoRow.fromPage( - requireColumnTypes, - page, - positionCount)))) { + arcticDeleteFilter.filter( + CloseableIterable.withNoopClose( + TrinoRow.fromPage(requireColumnTypes, page, positionCount)))) { int positionsToKeepCount = 0; for (TrinoRow rowToKeep : filteredRows) { positionsToKeep[positionsToKeepCount] = rowToKeep.getPosition(); positionsToKeepCount++; } - page = page.getPositions(positionsToKeep, 0, positionsToKeepCount).getColumns(expectedColumnIndexes); + page = + page.getPositions(positionsToKeep, 0, positionsToKeepCount) + .getColumns(expectedColumnIndexes); } catch (IOException e) { - throw new TrinoException(ICEBERG_BAD_DATA, "Failed to filter rows during merge-on-read operation", e); + throw new TrinoException( + ICEBERG_BAD_DATA, "Failed to filter rows during merge-on-read operation", e); } } @@ -233,8 +233,10 @@ private Page getPage() throws IOException { if (page == null) { current.close(); if (dataTasksIt.hasNext()) { - completedPositions += current.getCompletedPositions().isPresent() ? - current.getCompletedPositions().getAsLong() : 0L; + completedPositions += + current.getCompletedPositions().isPresent() + ? current.getCompletedPositions().getAsLong() + : 0L; completedBytes += current.getCompletedBytes(); readTimeNanos += current.getReadTimeNanos(); current = open(dataTasksIt.next()); @@ -271,16 +273,16 @@ private ConnectorPageSource open(ArcticFileScanTask arcticFileScanTask) { ImmutableList.of(), split.getPartitionSpecJson(), split.getPartitionDataJson(), - arcticFileScanTask.deletes().stream().map(TrinoDeleteFile::copyOf).collect(Collectors.toList()), + arcticFileScanTask.deletes().stream() + .map(TrinoDeleteFile::copyOf) + .collect(Collectors.toList()), null, - null - ), + null), table.getIcebergTableHandle(), requireColumnsDummy, dynamicFilter, idToConstant, false, - DateTimeZone.forID(TimeZone.getDefault().getID()) - ); + DateTimeZone.forID(TimeZone.getDefault().getID())); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplit.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplit.java index 7595436998..3cf229b6ca 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplit.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplit.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -30,9 +29,7 @@ import java.util.Collections; import java.util.List; -/** - * ConnectorSplit for Keyed Table - */ +/** ConnectorSplit for Keyed Table */ public class KeyedConnectorSplit implements ConnectorSplit { private byte[] keyedTableScanTaskBytes; diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplitManager.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplitManager.java index 3f16239968..c1c9c20c19 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplitManager.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedConnectorSplitManager.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,8 @@ package com.netease.arctic.trino.keyed; +import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; + import com.netease.arctic.scan.ArcticFileScanTask; import com.netease.arctic.scan.CombinedScanTask; import com.netease.arctic.scan.KeyedTableScan; @@ -47,15 +48,12 @@ import org.slf4j.LoggerFactory; import javax.inject.Inject; + import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; - -/** - * ConnectorSplitManager for Keyed Table - */ +/** ConnectorSplitManager for Keyed Table */ public class KeyedConnectorSplitManager implements ConnectorSplitManager { public static final int ARCTIC_DOMAIN_COMPACTION_THRESHOLD = 1000; @@ -78,28 +76,36 @@ public ConnectorSplitSource getSplits( Constraint constraint) { KeyedTableHandle keyedTableHandle = (KeyedTableHandle) handle; IcebergTableHandle icebergTableHandle = keyedTableHandle.getIcebergTableHandle(); - KeyedTable arcticTable = (arcticTransactionManager.get(transaction)) - .getArcticTable(new SchemaTableName( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName())).asKeyedTable(); + KeyedTable arcticTable = + (arcticTransactionManager.get(transaction)) + .getArcticTable( + new SchemaTableName( + icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName())) + .asKeyedTable(); if (arcticTable == null) { - throw new TableNotFoundException(new SchemaTableName( - icebergTableHandle.getSchemaName(), - icebergTableHandle.getTableName())); + throw new TableNotFoundException( + new SchemaTableName( + icebergTableHandle.getSchemaName(), icebergTableHandle.getTableName())); } - KeyedTableScan tableScan = arcticTable.newScan() - .filter(toIcebergExpression( - icebergTableHandle.getEnforcedPredicate().intersect(icebergTableHandle.getUnenforcedPredicate()))); + KeyedTableScan tableScan = + arcticTable + .newScan() + .filter( + toIcebergExpression( + icebergTableHandle + .getEnforcedPredicate() + .intersect(icebergTableHandle.getUnenforcedPredicate()))); if (ArcticSessionProperties.enableSplitTaskByDeleteRatio(session)) { - tableScan.enableSplitTaskByDeleteRatio(ArcticSessionProperties.splitTaskByDeleteRatio(session)); + tableScan.enableSplitTaskByDeleteRatio( + ArcticSessionProperties.splitTaskByDeleteRatio(session)); } ClassLoader pluginClassloader = arcticTable.getClass().getClassLoader(); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(pluginClassloader)) { - //优化 + // 优化 CloseableIterable combinedScanTasks = MetricUtil.duration(() -> tableScan.planTasks(), "plan tasks"); @@ -110,17 +116,19 @@ public ConnectorSplitSource getSplits( } } - List keyedConnectorSplits = fileScanTaskList.stream().map( - s -> { - ArcticFileScanTask arcticFileScanTask = s.dataTasks().get(0); - KeyedConnectorSplit keyedConnectorSplit = new KeyedConnectorSplit( - ObjectSerializerUtil.write(s), - PartitionSpecParser.toJson(arcticFileScanTask.spec()), - PartitionData.toJson(arcticFileScanTask.file().partition()) - ); - return keyedConnectorSplit; - } - ).collect(Collectors.toList()); + List keyedConnectorSplits = + fileScanTaskList.stream() + .map( + s -> { + ArcticFileScanTask arcticFileScanTask = s.dataTasks().get(0); + KeyedConnectorSplit keyedConnectorSplit = + new KeyedConnectorSplit( + ObjectSerializerUtil.write(s), + PartitionSpecParser.toJson(arcticFileScanTask.spec()), + PartitionData.toJson(arcticFileScanTask.file().partition())); + return keyedConnectorSplit; + }) + .collect(Collectors.toList()); return new FixedSplitSource(keyedConnectorSplits); } diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedDeleteFilter.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedDeleteFilter.java index 2fbc509426..cd758c558b 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedDeleteFilter.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedDeleteFilter.java @@ -18,6 +18,9 @@ package com.netease.arctic.trino.keyed; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; + import com.netease.arctic.hive.io.reader.AdaptHiveArcticDeleteFilter; import com.netease.arctic.scan.KeyedTableScanTask; import com.netease.arctic.table.PrimaryKeySpec; @@ -32,12 +35,7 @@ import java.util.List; import java.util.Optional; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; - -/** - * KeyedDeleteFilter is used to do MOR for Keyed Table - */ +/** KeyedDeleteFilter is used to do MOR for Keyed Table */ public class KeyedDeleteFilter extends AdaptHiveArcticDeleteFilter { private FileIO fileIO; @@ -48,7 +46,11 @@ protected KeyedDeleteFilter( List requestedSchema, PrimaryKeySpec primaryKeySpec, FileIO fileIO) { - super(keyedTableScanTask, tableSchema, filterSchema(tableSchema, requestedSchema), primaryKeySpec); + super( + keyedTableScanTask, + tableSchema, + filterSchema(tableSchema, requestedSchema), + primaryKeySpec); this.fileIO = fileIO; } @@ -62,13 +64,13 @@ protected InputFile getInputFile(String location) { return fileIO.newInputFile(location); } - private static Schema filterSchema(Schema tableSchema, List requestedColumns) { + private static Schema filterSchema( + Schema tableSchema, List requestedColumns) { return new Schema(filterFieldList(tableSchema.columns(), requestedColumns)); } private static List filterFieldList( - List fields, - List requestedSchemas) { + List fields, List requestedSchemas) { return requestedSchemas.stream() .map(id -> filterField(id, fields)) .filter(Optional::isPresent) @@ -78,18 +80,22 @@ private static List filterFieldList( private static Optional filterField( IcebergColumnHandle requestedSchema, List fields) { - for (Types.NestedField nestedField: fields) { + for (Types.NestedField nestedField : fields) { if (nestedField.fieldId() == requestedSchema.getId()) { return Optional.of(nestedField); } if (nestedField.type().isStructType()) { - Optional optional = filterField(requestedSchema, nestedField.type().asStructType().fields()); + Optional optional = + filterField(requestedSchema, nestedField.type().asStructType().fields()); if (optional.isPresent()) { return optional; } } } - return Optional.of(Types.NestedField.optional(requestedSchema.getId(), requestedSchema.getName(), - toIcebergType(requestedSchema.getType()))); + return Optional.of( + Types.NestedField.optional( + requestedSchema.getId(), + requestedSchema.getName(), + toIcebergType(requestedSchema.getType()))); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedPageSourceProvider.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedPageSourceProvider.java index 753e2b13b9..bb1b5789b0 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedPageSourceProvider.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedPageSourceProvider.java @@ -44,9 +44,7 @@ import java.util.List; import java.util.stream.Collectors; -/** - * ConnectorPageSourceProvider for Keyed Table - */ +/** ConnectorPageSourceProvider for Keyed Table */ public class KeyedPageSourceProvider implements ConnectorPageSourceProvider { private final IcebergPageSourceProvider icebergPageSourceProvider; @@ -73,32 +71,38 @@ public ConnectorPageSource createPageSource( DynamicFilter dynamicFilter) { KeyedConnectorSplit keyedConnectorSplit = (KeyedConnectorSplit) split; KeyedTableHandle keyedTableHandle = (KeyedTableHandle) table; - List icebergColumnHandles = columns.stream().map(IcebergColumnHandle.class::cast) - .collect(Collectors.toList()); + List icebergColumnHandles = + columns.stream().map(IcebergColumnHandle.class::cast).collect(Collectors.toList()); KeyedTableScanTask keyedTableScanTask = keyedConnectorSplit.getKeyedTableScanTask(); - List equDeleteFiles = keyedTableScanTask.arcticEquityDeletes().stream() - .map(ArcticFileScanTask::file).collect(Collectors.toList()); - Schema tableSchema = SchemaParser.fromJson(keyedTableHandle.getIcebergTableHandle().getTableSchemaJson()); - List deleteFilterRequiredSchema = IcebergUtil.getColumns(new KeyedDeleteFilter( - keyedTableScanTask, - tableSchema, - ImmutableList.of(), - keyedTableHandle.getPrimaryKeySpec(), - fileSystemFactory.create(session).toFileIo() - ).requiredSchema(), typeManager); + List equDeleteFiles = + keyedTableScanTask.arcticEquityDeletes().stream() + .map(ArcticFileScanTask::file) + .collect(Collectors.toList()); + Schema tableSchema = + SchemaParser.fromJson(keyedTableHandle.getIcebergTableHandle().getTableSchemaJson()); + List deleteFilterRequiredSchema = + IcebergUtil.getColumns( + new KeyedDeleteFilter( + keyedTableScanTask, + tableSchema, + ImmutableList.of(), + keyedTableHandle.getPrimaryKeySpec(), + fileSystemFactory.create(session).toFileIo()) + .requiredSchema(), + typeManager); ImmutableList.Builder requiredColumnsBuilder = ImmutableList.builder(); requiredColumnsBuilder.addAll(icebergColumnHandles); deleteFilterRequiredSchema.stream() .filter(column -> !columns.contains(column)) .forEach(requiredColumnsBuilder::add); List requiredColumns = requiredColumnsBuilder.build(); - AdaptHiveArcticDeleteFilter arcticDeleteFilter = new KeyedDeleteFilter( - keyedTableScanTask, - tableSchema, - requiredColumns, - keyedTableHandle.getPrimaryKeySpec(), - fileSystemFactory.create(session).toFileIo() - ); + AdaptHiveArcticDeleteFilter arcticDeleteFilter = + new KeyedDeleteFilter( + keyedTableScanTask, + tableSchema, + requiredColumns, + keyedTableHandle.getPrimaryKeySpec(), + fileSystemFactory.create(session).toFileIo()); return new KeyedConnectorPageSource( icebergColumnHandles, @@ -110,7 +114,6 @@ public ConnectorPageSource createPageSource( keyedTableHandle, dynamicFilter, typeManager, - arcticDeleteFilter - ); + arcticDeleteFilter); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedTableHandle.java b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedTableHandle.java index 7f1b31d06e..8d914ba5cb 100644 --- a/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedTableHandle.java +++ b/trino/src/main/java/com/netease/arctic/trino/keyed/KeyedTableHandle.java @@ -28,9 +28,7 @@ import java.util.Set; -/** - * ConnectorTableHandle for Keyed Table, beside contain primary beside IcebergTableHandle - */ +/** ConnectorTableHandle for Keyed Table, beside contain primary beside IcebergTableHandle */ public class KeyedTableHandle implements ConnectorTableHandle { private IcebergTableHandle icebergTableHandle; @@ -65,7 +63,8 @@ public PrimaryKeySpec getPrimaryKeySpec() { } public KeyedTableHandle withProjectedColumns(Set projectedColumns) { - IcebergTableHandle newIcebergTableHandle = icebergTableHandle.withProjectedColumns(projectedColumns); + IcebergTableHandle newIcebergTableHandle = + icebergTableHandle.withProjectedColumns(projectedColumns); return new KeyedTableHandle(newIcebergTableHandle, primaryKeySpecBytes); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/AdaptHiveIcebergTableHandle.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/AdaptHiveIcebergTableHandle.java index e0252fd300..a206a00645 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/AdaptHiveIcebergTableHandle.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/AdaptHiveIcebergTableHandle.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino.unkeyed; +import static java.util.Objects.requireNonNull; + import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; @@ -35,8 +37,6 @@ import java.util.Optional; import java.util.Set; -import static java.util.Objects.requireNonNull; - public class AdaptHiveIcebergTableHandle extends IcebergTableHandle { private final String schemaName; @@ -44,7 +44,8 @@ public class AdaptHiveIcebergTableHandle extends IcebergTableHandle { private final TableType tableType; private final Optional snapshotId; private final String tableSchemaJson; - // Empty means the partitioning spec is not known (can be the case for certain time travel queries). + // Empty means the partitioning spec is not known (can be the case for certain time travel + // queries). private final Optional partitionSpecJson; private final int formatVersion; private final String tableLocation; @@ -54,7 +55,8 @@ public class AdaptHiveIcebergTableHandle extends IcebergTableHandle { // UPDATE only private final List updatedColumns; - // Filter used during split generation and table scan, but not required to be strictly enforced by Iceberg Connector + // Filter used during split generation and table scan, but not required to be strictly enforced by + // Iceberg Connector private final TupleDomain unenforcedPredicate; // Filter guaranteed to be enforced by Iceberg connector @@ -149,18 +151,21 @@ public AdaptHiveIcebergTableHandle( this.formatVersion = formatVersion; this.unenforcedPredicate = requireNonNull(unenforcedPredicate, "unenforcedPredicate is null"); this.enforcedPredicate = requireNonNull(enforcedPredicate, "enforcedPredicate is null"); - this.projectedColumns = ImmutableSet.copyOf(requireNonNull(projectedColumns, "projectedColumns is null")); + this.projectedColumns = + ImmutableSet.copyOf(requireNonNull(projectedColumns, "projectedColumns is null")); this.nameMappingJson = requireNonNull(nameMappingJson, "nameMappingJson is null"); this.tableLocation = requireNonNull(tableLocation, "tableLocation is null"); - this.storageProperties = ImmutableMap.copyOf(requireNonNull(storageProperties, "storageProperties is null")); + this.storageProperties = + ImmutableMap.copyOf(requireNonNull(storageProperties, "storageProperties is null")); this.retryMode = requireNonNull(retryMode, "retryMode is null"); - this.updatedColumns = ImmutableList.copyOf(requireNonNull(updatedColumns, "updatedColumns is null")); + this.updatedColumns = + ImmutableList.copyOf(requireNonNull(updatedColumns, "updatedColumns is null")); this.recordScannedFiles = recordScannedFiles; this.maxScannedFileSize = requireNonNull(maxScannedFileSize, "maxScannedFileSize is null"); } - - public AdaptHiveIcebergTableHandle withProjectedColumns(Set projectedColumns) { + public AdaptHiveIcebergTableHandle withProjectedColumns( + Set projectedColumns) { return new AdaptHiveIcebergTableHandle( schemaName, tableName, @@ -202,7 +207,8 @@ public AdaptHiveIcebergTableHandle withRetryMode(RetryMode retryMode) { Optional.empty()); } - public AdaptHiveIcebergTableHandle forOptimize(boolean recordScannedFiles, DataSize maxScannedFileSize) { + public AdaptHiveIcebergTableHandle forOptimize( + boolean recordScannedFiles, DataSize maxScannedFileSize) { return new AdaptHiveIcebergTableHandle( schemaName, tableName, diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalog.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalog.java index 4c63a960cc..7f1088ec11 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalog.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalog.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.unkeyed; +import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; +import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static java.util.Locale.ENGLISH; + import com.google.common.collect.ImmutableList; import com.netease.arctic.catalog.ArcticCatalog; import com.netease.arctic.table.ArcticTable; @@ -43,13 +47,7 @@ import java.util.Optional; import java.util.stream.Collectors; -import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static java.util.Locale.ENGLISH; - -/** - * A TrinoCatalog for Arctic, this is in order to reuse iceberg code - */ +/** A TrinoCatalog for Arctic, this is in order to reuse iceberg code */ public class ArcticTrinoCatalog implements TrinoCatalog { private ArcticCatalog arcticCatalog; @@ -61,7 +59,8 @@ public ArcticTrinoCatalog(ArcticCatalog arcticCatalog) { @Override public boolean namespaceExists(ConnectorSession session, String namespace) { if (!namespace.equals(namespace.toLowerCase(ENGLISH))) { - // Currently, Trino schemas are always lowercase, so this one cannot exist (https://github.com/trinodb/trino/issues/17) + // Currently, Trino schemas are always lowercase, so this one cannot exist + // (https://github.com/trinodb/trino/issues/17) return false; } if (HiveUtil.isHiveSystemSchema(namespace)) { @@ -96,7 +95,8 @@ public Map loadNamespaceMetadata(ConnectorSession session, Strin } @Override - public Optional getNamespacePrincipal(ConnectorSession session, String namespace) { + public Optional getNamespacePrincipal( + ConnectorSession session, String namespace) { return Optional.empty(); } @@ -110,7 +110,8 @@ public void createNamespace( } @Override - public void setNamespacePrincipal(ConnectorSession session, String namespace, TrinoPrincipal principal) { + public void setNamespacePrincipal( + ConnectorSession session, String namespace, TrinoPrincipal principal) { throw new TrinoException(NOT_SUPPORTED, "Unsupported drop db"); } @@ -121,8 +122,7 @@ public void renameNamespace(ConnectorSession session, String source, String targ @Override public List listTables(ConnectorSession session, Optional namespace) { - return listNamespaces(session, namespace) - .stream() + return listNamespaces(session, namespace).stream() .flatMap(s -> arcticCatalog.listTables(s).stream()) .map(s -> new SchemaTableName(s.getDatabase(), s.getTableName())) .collect(Collectors.toList()); @@ -130,12 +130,17 @@ public List listTables(ConnectorSession session, Optional properties) { - return arcticCatalog.newTableBuilder(getTableIdentifier(schemaTableName), schema) - .withPartitionSpec(partitionSpec) - .withProperties(properties).createTransaction(); + ConnectorSession session, + SchemaTableName schemaTableName, + Schema schema, + PartitionSpec partitionSpec, + String location, + Map properties) { + return arcticCatalog + .newTableBuilder(getTableIdentifier(schemaTableName), schema) + .withPartitionSpec(partitionSpec) + .withProperties(properties) + .createTransaction(); } @Override @@ -149,8 +154,10 @@ public void registerTable( @Override public void dropTable(ConnectorSession session, SchemaTableName schemaTableName) { - arcticCatalog.dropTable(TableIdentifier.of(arcticCatalog.name(), - schemaTableName.getSchemaName(), schemaTableName.getTableName()), true); + arcticCatalog.dropTable( + TableIdentifier.of( + arcticCatalog.name(), schemaTableName.getSchemaName(), schemaTableName.getTableName()), + true); } @Override @@ -168,12 +175,14 @@ public Table loadTable(ConnectorSession session, SchemaTableName schemaTableName } @Override - public void updateTableComment(ConnectorSession session, SchemaTableName schemaTableName, Optional comment) { + public void updateTableComment( + ConnectorSession session, SchemaTableName schemaTableName, Optional comment) { throw new TrinoException(NOT_SUPPORTED, "UnSupport update table comment"); } @Override - public void updateViewComment(ConnectorSession session, SchemaTableName schemaViewName, Optional comment) { + public void updateViewComment( + ConnectorSession session, SchemaTableName schemaViewName, Optional comment) { throw new TrinoException(NOT_SUPPORTED, "UnSupport update table comment"); } @@ -181,18 +190,20 @@ public void updateViewComment(ConnectorSession session, SchemaTableName schemaVi public void updateViewColumnComment( ConnectorSession session, SchemaTableName schemaViewName, - String columnName, Optional comment) { + String columnName, + Optional comment) { throw new TrinoException(NOT_SUPPORTED, "UnSupport update table comment"); } @Override public String defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName) { - //不会使用 + // 不会使用 return null; } @Override - public void setTablePrincipal(ConnectorSession session, SchemaTableName schemaTableName, TrinoPrincipal principal) { + public void setTablePrincipal( + ConnectorSession session, SchemaTableName schemaTableName, TrinoPrincipal principal) { throw new TrinoException(NOT_SUPPORTED, "UnSupport set table principal"); } @@ -211,7 +222,8 @@ public void renameView(ConnectorSession session, SchemaTableName source, SchemaT } @Override - public void setViewPrincipal(ConnectorSession session, SchemaTableName schemaViewName, TrinoPrincipal principal) { + public void setViewPrincipal( + ConnectorSession session, SchemaTableName schemaViewName, TrinoPrincipal principal) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @@ -226,17 +238,20 @@ public List listViews(ConnectorSession session, Optional getViews(ConnectorSession session, Optional namespace) { + public Map getViews( + ConnectorSession session, Optional namespace) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @Override - public Optional getView(ConnectorSession session, SchemaTableName viewIdentifier) { + public Optional getView( + ConnectorSession session, SchemaTableName viewIdentifier) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @Override - public List listMaterializedViews(ConnectorSession session, Optional namespace) { + public List listMaterializedViews( + ConnectorSession session, Optional namespace) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @@ -257,13 +272,13 @@ public void dropMaterializedView(ConnectorSession session, SchemaTableName schem @Override public Optional getMaterializedView( - ConnectorSession session, - SchemaTableName schemaViewName) { + ConnectorSession session, SchemaTableName schemaViewName) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @Override - public void renameMaterializedView(ConnectorSession session, SchemaTableName source, SchemaTableName target) { + public void renameMaterializedView( + ConnectorSession session, SchemaTableName source, SchemaTableName target) { throw new TrinoException(NOT_SUPPORTED, "Unsupported view"); } @@ -277,12 +292,13 @@ public void updateColumnComment( } @Override - public Optional redirectTable(ConnectorSession session, SchemaTableName tableName) { + public Optional redirectTable( + ConnectorSession session, SchemaTableName tableName) { return Optional.empty(); } private TableIdentifier getTableIdentifier(SchemaTableName schemaTableName) { - return TableIdentifier.of(arcticCatalog.name(), - schemaTableName.getSchemaName(), schemaTableName.getTableName()); + return TableIdentifier.of( + arcticCatalog.name(), schemaTableName.getSchemaName(), schemaTableName.getTableName()); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalogFactory.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalogFactory.java index 963c762244..c75cf8d3e6 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalogFactory.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/ArcticTrinoCatalogFactory.java @@ -25,9 +25,7 @@ import javax.inject.Inject; -/** - * Factory to generate TrinoCatalog - */ +/** Factory to generate TrinoCatalog */ public class ArcticTrinoCatalogFactory implements TrinoCatalogFactory { private ArcticCatalogFactory arcticCatalogFactory; diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergMetadata.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergMetadata.java index a213dd5b9c..74d330e125 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergMetadata.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergMetadata.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,90 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; +import static com.google.common.base.Verify.verifyNotNull; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Maps.transformValues; +import static io.trino.plugin.base.util.Procedures.checkProcedureArgument; +import static io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns; +import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables; +import static io.trino.plugin.hive.util.HiveUtil.isStructuralType; +import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; +import static io.trino.plugin.iceberg.IcebergAnalyzeProperties.getColumnNames; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_FILE_RECORD_COUNT; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_DATA; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_SPEC_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_ROW_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_ROW_ID_NAME; +import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; +import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnMetadata; +import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; +import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnMetadata; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_COMMIT_ERROR; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_MODIFIED_TIME; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_PATH; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getExpireSnapshotMinRetention; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getRemoveOrphanFilesMinRetention; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isExtendedStatisticsEnabled; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled; +import static io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY; +import static io.trino.plugin.iceberg.IcebergTableProperties.FORMAT_VERSION_PROPERTY; +import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY; +import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; +import static io.trino.plugin.iceberg.IcebergUtil.commit; +import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; +import static io.trino.plugin.iceberg.IcebergUtil.fileName; +import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; +import static io.trino.plugin.iceberg.IcebergUtil.getColumns; +import static io.trino.plugin.iceberg.IcebergUtil.getFileFormat; +import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; +import static io.trino.plugin.iceberg.IcebergUtil.getTableComment; +import static io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction; +import static io.trino.plugin.iceberg.IcebergUtil.schemaFromMetadata; +import static io.trino.plugin.iceberg.PartitionFields.parsePartitionFields; +import static io.trino.plugin.iceberg.PartitionFields.toPartitionFields; +import static io.trino.plugin.iceberg.TableStatisticsReader.TRINO_STATS_COLUMN_ID_PATTERN; +import static io.trino.plugin.iceberg.TableStatisticsReader.TRINO_STATS_PREFIX; +import static io.trino.plugin.iceberg.TableType.DATA; +import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; +import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; +import static io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES; +import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.DROP_EXTENDED_STATS; +import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.EXPIRE_SNAPSHOTS; +import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE; +import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.REMOVE_ORPHAN_FILES; +import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; +import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.FRESH; +import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.STALE; +import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.UNKNOWN; +import static io.trino.spi.connector.RetryMode.NO_RETRIES; +import static java.lang.String.format; +import static java.util.Locale.ENGLISH; +import static java.util.Objects.requireNonNull; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.joining; +import static org.apache.iceberg.FileContent.POSITION_DELETES; +import static org.apache.iceberg.ReachableFileUtil.metadataFileLocations; +import static org.apache.iceberg.ReachableFileUtil.versionHintLocation; +import static org.apache.iceberg.SnapshotSummary.DELETED_RECORDS_PROP; +import static org.apache.iceberg.SnapshotSummary.REMOVED_EQ_DELETES_PROP; +import static org.apache.iceberg.SnapshotSummary.REMOVED_POS_DELETES_PROP; +import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL; +import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL_DEFAULT; +import static org.apache.iceberg.TableProperties.FORMAT_VERSION; +import static org.apache.iceberg.TableProperties.WRITE_LOCATION_PROVIDER_IMPL; +import static org.apache.iceberg.types.TypeUtil.indexParents; + import com.google.common.base.Splitter; import com.google.common.base.Suppliers; import com.google.common.base.VerifyException; @@ -190,96 +273,11 @@ import java.util.regex.Pattern; import java.util.stream.Stream; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Verify.verify; -import static com.google.common.base.Verify.verifyNotNull; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Maps.transformValues; -import static io.trino.plugin.base.util.Procedures.checkProcedureArgument; -import static io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns; -import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables; -import static io.trino.plugin.hive.util.HiveUtil.isStructuralType; -import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; -import static io.trino.plugin.iceberg.IcebergAnalyzeProperties.getColumnNames; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_FILE_RECORD_COUNT; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_DATA; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_SPEC_ID; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_ROW_ID; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_ROW_ID_NAME; -import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; -import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnMetadata; -import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; -import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnMetadata; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_COMMIT_ERROR; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_INVALID_METADATA; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_MODIFIED_TIME; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_PATH; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getExpireSnapshotMinRetention; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getRemoveOrphanFilesMinRetention; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isExtendedStatisticsEnabled; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isProjectionPushdownEnabled; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isStatisticsEnabled; -import static io.trino.plugin.iceberg.IcebergTableProperties.FILE_FORMAT_PROPERTY; -import static io.trino.plugin.iceberg.IcebergTableProperties.FORMAT_VERSION_PROPERTY; -import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY; -import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; -import static io.trino.plugin.iceberg.IcebergUtil.commit; -import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; -import static io.trino.plugin.iceberg.IcebergUtil.fileName; -import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; -import static io.trino.plugin.iceberg.IcebergUtil.getColumns; -import static io.trino.plugin.iceberg.IcebergUtil.getFileFormat; -import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; -import static io.trino.plugin.iceberg.IcebergUtil.getTableComment; -import static io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction; -import static io.trino.plugin.iceberg.IcebergUtil.schemaFromMetadata; -import static io.trino.plugin.iceberg.PartitionFields.parsePartitionFields; -import static io.trino.plugin.iceberg.PartitionFields.toPartitionFields; -import static io.trino.plugin.iceberg.TableStatisticsReader.TRINO_STATS_COLUMN_ID_PATTERN; -import static io.trino.plugin.iceberg.TableStatisticsReader.TRINO_STATS_PREFIX; -import static io.trino.plugin.iceberg.TableType.DATA; -import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; -import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; -import static io.trino.plugin.iceberg.catalog.hms.TrinoHiveCatalog.DEPENDS_ON_TABLES; -import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.DROP_EXTENDED_STATS; -import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.EXPIRE_SNAPSHOTS; -import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.OPTIMIZE; -import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.REMOVE_ORPHAN_FILES; -import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.FRESH; -import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.STALE; -import static io.trino.spi.connector.MaterializedViewFreshness.Freshness.UNKNOWN; -import static io.trino.spi.connector.RetryMode.NO_RETRIES; -import static java.lang.String.format; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.function.Function.identity; -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.joining; -import static org.apache.iceberg.FileContent.POSITION_DELETES; -import static org.apache.iceberg.ReachableFileUtil.metadataFileLocations; -import static org.apache.iceberg.ReachableFileUtil.versionHintLocation; -import static org.apache.iceberg.SnapshotSummary.DELETED_RECORDS_PROP; -import static org.apache.iceberg.SnapshotSummary.REMOVED_EQ_DELETES_PROP; -import static org.apache.iceberg.SnapshotSummary.REMOVED_POS_DELETES_PROP; -import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL; -import static org.apache.iceberg.TableProperties.DELETE_ISOLATION_LEVEL_DEFAULT; -import static org.apache.iceberg.TableProperties.FORMAT_VERSION; -import static org.apache.iceberg.TableProperties.WRITE_LOCATION_PROVIDER_IMPL; -import static org.apache.iceberg.types.TypeUtil.indexParents; - /** - * Iceberg original metadata has some problems for arctic, such as iceberg version, table type. - * So copy from IcebergMetadata and made some change + * Iceberg original metadata has some problems for arctic, such as iceberg version, table type. So + * copy from IcebergMetadata and made some change */ -public class IcebergMetadata - implements ConnectorMetadata { +public class IcebergMetadata implements ConnectorMetadata { private static final Logger log = Logger.get(io.trino.plugin.iceberg.IcebergMetadata.class); private static final Pattern PATH_PATTERN = Pattern.compile("(.*)/[^/]+"); private static final int OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION = 2; @@ -304,7 +302,8 @@ public class IcebergMetadata private final TrinoFileSystemFactory fileSystemFactory; private final TableStatisticsWriter tableStatisticsWriter; - private final Map tableStatisticsCache = new ConcurrentHashMap<>(); + private final Map tableStatisticsCache = + new ConcurrentHashMap<>(); private Transaction transaction; @@ -318,7 +317,8 @@ public IcebergMetadata( this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.catalog = requireNonNull(catalog, "catalog is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); - this.tableStatisticsWriter = requireNonNull(tableStatisticsWriter, "tableStatisticsWriter is null"); + this.tableStatisticsWriter = + requireNonNull(tableStatisticsWriter, "tableStatisticsWriter is null"); } @Override @@ -332,12 +332,14 @@ public List listSchemaNames(ConnectorSession session) { } @Override - public Map getSchemaProperties(ConnectorSession session, CatalogSchemaName schemaName) { + public Map getSchemaProperties( + ConnectorSession session, CatalogSchemaName schemaName) { return catalog.loadNamespaceMetadata(session, schemaName.getSchemaName()); } @Override - public Optional getSchemaOwner(ConnectorSession session, CatalogSchemaName schemaName) { + public Optional getSchemaOwner( + ConnectorSession session, CatalogSchemaName schemaName) { return catalog.getNamespacePrincipal(session, schemaName.getSchemaName()); } @@ -345,13 +347,16 @@ public Optional getSchemaOwner(ConnectorSession session, Catalog public IcebergTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) { IcebergTableName name = IcebergTableName.from(tableName.getTableName()); if (name.getTableType() != DATA) { - // Pretend the table does not exist to produce better error message in case of table redirects to Hive + // Pretend the table does not exist to produce better error message in case of table redirects + // to Hive return null; } Table table; try { - table = catalog.loadTable(session, new SchemaTableName(tableName.getSchemaName(), name.getTableName())); + table = + catalog.loadTable( + session, new SchemaTableName(tableName.getSchemaName(), name.getTableName())); } catch (TableNotFoundException e) { return null; } @@ -402,10 +407,13 @@ public IcebergTableHandle getTableHandle(ConnectorSession session, SchemaTableNa @Override public Optional getSystemTable(ConnectorSession session, SchemaTableName tableName) { return getRawSystemTable(session, tableName) - .map(systemTable -> new ClassLoaderSafeSystemTable(systemTable, getClass().getClassLoader())); + .map( + systemTable -> + new ClassLoaderSafeSystemTable(systemTable, getClass().getClassLoader())); } - private Optional getRawSystemTable(ConnectorSession session, SchemaTableName tableName) { + private Optional getRawSystemTable( + ConnectorSession session, SchemaTableName tableName) { IcebergTableName name = IcebergTableName.from(tableName.getTableName()); if (name.getTableType() == DATA) { return Optional.empty(); @@ -414,7 +422,9 @@ private Optional getRawSystemTable(ConnectorSession session, Schema // load the base table for the system table Table table; try { - table = catalog.loadTable(session, new SchemaTableName(tableName.getSchemaName(), name.getTableName())); + table = + catalog.loadTable( + session, new SchemaTableName(tableName.getSchemaName(), name.getTableName())); } catch (TableNotFoundException e) { return Optional.empty(); } catch (UnknownTableTypeException e) { @@ -422,7 +432,8 @@ private Optional getRawSystemTable(ConnectorSession session, Schema return Optional.empty(); } - SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType()); + SchemaTableName systemTableName = + new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType()); switch (name.getTableType()) { case DATA: // Handled above. @@ -432,11 +443,13 @@ private Optional getRawSystemTable(ConnectorSession session, Schema case SNAPSHOTS: return Optional.of(new SnapshotsTable(systemTableName, typeManager, table)); case PARTITIONS: - return Optional.of(new PartitionTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); + return Optional.of( + new PartitionTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); case MANIFESTS: return Optional.of(new ManifestsTable(systemTableName, table, getCurrentSnapshotId(table))); case FILES: - return Optional.of(new FilesTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); + return Optional.of( + new FilesTable(systemTableName, typeManager, table, getCurrentSnapshotId(table))); case PROPERTIES: return Optional.of(new PropertiesTable(systemTableName, table)); } @@ -444,7 +457,8 @@ private Optional getRawSystemTable(ConnectorSession session, Schema } @Override - public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle tableHandle) { + public ConnectorTableProperties getTableProperties( + ConnectorSession session, ConnectorTableHandle tableHandle) { IcebergTableHandle table = (IcebergTableHandle) tableHandle; if (table.getSnapshotId().isEmpty()) { @@ -460,7 +474,8 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); - // Extract identity partition fields that are present in all partition specs, for creating the discrete predicates. + // Extract identity partition fields that are present in all partition specs, for creating the + // discrete predicates. Set partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable); TupleDomain enforcedPredicate = table.getEnforcedPredicate(); @@ -468,54 +483,67 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con DiscretePredicates discretePredicates = null; if (!partitionSourceIds.isEmpty()) { // Extract identity partition columns - Map columns = getColumns(icebergTable.schema(), typeManager).stream() - .filter(column -> partitionSourceIds.contains(column.getId())) - .collect(toImmutableMap(IcebergColumnHandle::getId, identity())); - - Supplier> lazyFiles = Suppliers.memoize(() -> { - TableScan tableScan = icebergTable.newScan() - .useSnapshot(table.getSnapshotId().get()) - .filter(toIcebergExpression(enforcedPredicate)); - - try (CloseableIterable iterator = tableScan.planFiles()) { - return ImmutableList.copyOf(iterator); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - }); + Map columns = + getColumns(icebergTable.schema(), typeManager).stream() + .filter(column -> partitionSourceIds.contains(column.getId())) + .collect(toImmutableMap(IcebergColumnHandle::getId, identity())); + + Supplier> lazyFiles = + Suppliers.memoize( + () -> { + TableScan tableScan = + icebergTable + .newScan() + .useSnapshot(table.getSnapshotId().get()) + .filter(toIcebergExpression(enforcedPredicate)); + + try (CloseableIterable iterator = tableScan.planFiles()) { + return ImmutableList.copyOf(iterator); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); Iterable files = () -> lazyFiles.get().iterator(); - Iterable> discreteTupleDomain = Iterables.transform(files, fileScan -> { - // Extract partition values in the data file - Map> partitionColumnValueStrings = getPartitionKeys(fileScan); - Map partitionValues = partitionSourceIds.stream() - .filter(partitionColumnValueStrings::containsKey) - .collect(toImmutableMap( - columns::get, - columnId -> { - IcebergColumnHandle column = columns.get(columnId); - Object prestoValue = deserializePartitionValue( - column.getType(), - partitionColumnValueStrings.get(columnId).orElse(null), - column.getName()); - - return NullableValue.of(column.getType(), prestoValue); - })); - - return TupleDomain.fromFixedValues(partitionValues); - }); - - discretePredicates = new DiscretePredicates( - columns.values().stream() - .map(ColumnHandle.class::cast) - .collect(toImmutableList()), - discreteTupleDomain); + Iterable> discreteTupleDomain = + Iterables.transform( + files, + fileScan -> { + // Extract partition values in the data file + Map> partitionColumnValueStrings = + getPartitionKeys(fileScan); + Map partitionValues = + partitionSourceIds.stream() + .filter(partitionColumnValueStrings::containsKey) + .collect( + toImmutableMap( + columns::get, + columnId -> { + IcebergColumnHandle column = columns.get(columnId); + Object prestoValue = + deserializePartitionValue( + column.getType(), + partitionColumnValueStrings.get(columnId).orElse(null), + column.getName()); + + return NullableValue.of(column.getType(), prestoValue); + })); + + return TupleDomain.fromFixedValues(partitionValues); + }); + + discretePredicates = + new DiscretePredicates( + columns.values().stream().map(ColumnHandle.class::cast).collect(toImmutableList()), + discreteTupleDomain); } return new ConnectorTableProperties( - // Using the predicate here directly avoids eagerly loading all partition values. Logically, this - // still keeps predicate and discretePredicates evaluation the same on every row of the table. This + // Using the predicate here directly avoids eagerly loading all partition values. Logically, + // this + // still keeps predicate and discretePredicates evaluation the same on every row of the + // table. This // can be further optimized by intersecting with partition values at the cost of iterating // over all tableScan.planFiles() and caching partition values in table handle. enforcedPredicate.transformKeys(ColumnHandle.class::cast), @@ -527,10 +555,12 @@ public ConnectorTableProperties getTableProperties(ConnectorSession session, Con } @Override - public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) { + public ConnectorTableMetadata getTableMetadata( + ConnectorSession session, ConnectorTableHandle table) { IcebergTableHandle tableHandle = (IcebergTableHandle) table; Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName()); - List columns = getColumnMetadatas(SchemaParser.fromJson(tableHandle.getTableSchemaJson())); + List columns = + getColumnMetadatas(SchemaParser.fromJson(tableHandle.getTableSchemaJson())); ImmutableMap.Builder properties = ImmutableMap.builder(); properties.put(FILE_FORMAT_PROPERTY, getFileFormat(icebergTable)); if (!icebergTable.spec().fields().isEmpty()) { @@ -545,7 +575,10 @@ public ConnectorTableMetadata getTableMetadata(ConnectorSession session, Connect properties.put(FORMAT_VERSION_PROPERTY, formatVersion); return new ConnectorTableMetadata( - tableHandle.getSchemaTableName(), columns, properties.buildOrThrow(), getTableComment(icebergTable)); + tableHandle.getSchemaTableName(), + columns, + properties.buildOrThrow(), + getTableComment(icebergTable)); } @Override @@ -554,7 +587,8 @@ public List listTables(ConnectorSession session, Optional getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Map getColumnHandles( + ConnectorSession session, ConnectorTableHandle tableHandle) { IcebergTableHandle table = (IcebergTableHandle) tableHandle; ImmutableMap.Builder columnHandles = ImmutableMap.builder(); for (IcebergColumnHandle columnHandle : @@ -568,9 +602,7 @@ public Map getColumnHandles(ConnectorSession session, Conn @Override public ColumnMetadata getColumnMetadata( - ConnectorSession session, - ConnectorTableHandle tableHandle, - ColumnHandle columnHandle) { + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { IcebergColumnHandle column = (IcebergColumnHandle) columnHandle; return ColumnMetadata.builder() .setName(column.getName()) @@ -581,14 +613,14 @@ public ColumnMetadata getColumnMetadata( @Override public Map> listTableColumns( - ConnectorSession session, - SchemaTablePrefix prefix) { + ConnectorSession session, SchemaTablePrefix prefix) { throw new UnsupportedOperationException( "The deprecated listTableColumns is not supported because streamTableColumns is implemented instead"); } @Override - public Iterator streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { + public Iterator streamTableColumns( + ConnectorSession session, SchemaTablePrefix prefix) { requireNonNull(prefix, "prefix is null"); List schemaTableNames; if (prefix.getTable().isEmpty()) { @@ -597,32 +629,33 @@ public Iterator streamTableColumns(ConnectorSession sessio schemaTableNames = ImmutableList.of(prefix.toSchemaTableName()); } return schemaTableNames.stream() - .flatMap(tableName -> { - try { - if (redirectTable(session, tableName).isPresent()) { - return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); - } - - Table icebergTable = catalog.loadTable(session, tableName); - List columns = getColumnMetadatas(icebergTable.schema()); - return Stream.of(TableColumnsMetadata.forTable(tableName, columns)); - } catch (TableNotFoundException e) { - // Table disappeared during listing operation - return Stream.empty(); - } catch (UnknownTableTypeException e) { - // Skip unsupported table type in case that the table redirects are not enabled - return Stream.empty(); - } catch (RuntimeException e) { - // Table can be being removed and this may cause all sorts of exceptions. - // Log, because we're catching broadly. - log.warn( - e, - "Failed to access metadata of table %s during streaming table columns for %s", - tableName, - prefix); - return Stream.empty(); - } - }) + .flatMap( + tableName -> { + try { + if (redirectTable(session, tableName).isPresent()) { + return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); + } + + Table icebergTable = catalog.loadTable(session, tableName); + List columns = getColumnMetadatas(icebergTable.schema()); + return Stream.of(TableColumnsMetadata.forTable(tableName, columns)); + } catch (TableNotFoundException e) { + // Table disappeared during listing operation + return Stream.empty(); + } catch (UnknownTableTypeException e) { + // Skip unsupported table type in case that the table redirects are not enabled + return Stream.empty(); + } catch (RuntimeException e) { + // Table can be being removed and this may cause all sorts of exceptions. + // Log, because we're catching broadly. + log.warn( + e, + "Failed to access metadata of table %s during streaming table columns for %s", + tableName, + prefix); + return Stream.empty(); + } + }) .iterator(); } @@ -646,12 +679,14 @@ public void renameSchema(ConnectorSession session, String source, String target) } @Override - public void setSchemaAuthorization(ConnectorSession session, String schemaName, TrinoPrincipal principal) { + public void setSchemaAuthorization( + ConnectorSession session, String schemaName, TrinoPrincipal principal) { catalog.setNamespacePrincipal(session, schemaName, principal); } @Override - public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { + public void createTable( + ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { Optional layout = getNewTableLayout(session, tableMetadata); finishCreateTable( session, @@ -661,16 +696,18 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe } @Override - public void setTableComment(ConnectorSession session, ConnectorTableHandle tableHandle, Optional comment) { - catalog.updateTableComment(session, ((IcebergTableHandle) tableHandle).getSchemaTableName(), comment); + public void setTableComment( + ConnectorSession session, ConnectorTableHandle tableHandle, Optional comment) { + catalog.updateTableComment( + session, ((IcebergTableHandle) tableHandle).getSchemaTableName(), comment); } @Override public Optional getNewTableLayout( - ConnectorSession session, - ConnectorTableMetadata tableMetadata) { + ConnectorSession session, ConnectorTableMetadata tableMetadata) { Schema schema = schemaFromMetadata(tableMetadata.getColumns()); - PartitionSpec partitionSpec = parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties())); + PartitionSpec partitionSpec = + parsePartitionFields(schema, getPartitioning(tableMetadata.getProperties())); return getWriteLayout(schema, partitionSpec, false); } @@ -690,14 +727,19 @@ public ConnectorOutputTableHandle beginCreateTable( TrinoFileSystem fileSystem = fileSystemFactory.create(session); try { if (fileSystem.listFiles(location).hasNext()) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, format("" + - "Cannot create a table on a non-empty location:" + - " %s, set 'iceberg.unique-table-location=true' in your Iceberg catalog properties " + - "to use unique table locations for every table.", location)); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, + format( + "" + + "Cannot create a table on a non-empty location:" + + " %s, set 'iceberg.unique-table-location=true' in your Iceberg catalog properties " + + "to use unique table locations for every table.", + location)); } return newWritableTableHandle(tableMetadata.getTable(), transaction.table(), retryMode); } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed checking new table's location: " + location, e); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, "Failed checking new table's location: " + location, e); } } @@ -716,14 +758,13 @@ public Optional finishCreateTable( return Optional.empty(); } - return finishInsert(session, (IcebergWritableTableHandle) tableHandle, fragments, computedStatistics); + return finishInsert( + session, (IcebergWritableTableHandle) tableHandle, fragments, computedStatistics); } @Override public ConnectorMergeTableHandle beginMerge( - ConnectorSession session, - ConnectorTableHandle tableHandle, - RetryMode retryMode) { + ConnectorSession session, ConnectorTableHandle tableHandle, RetryMode retryMode) { IcebergTableHandle table = (IcebergTableHandle) tableHandle; verifyTableVersionForUpdate(table); @@ -752,7 +793,8 @@ public void finishMerge( private static void verifyTableVersionForUpdate(IcebergTableHandle table) { if (table.getFormatVersion() < 2) { - throw new TrinoException(NOT_SUPPORTED, "Iceberg table updates require at least format version 2"); + throw new TrinoException( + NOT_SUPPORTED, "Iceberg table updates require at least format version 2"); } } @@ -763,9 +805,10 @@ private void finishWrite( boolean runUpdateValidations) { Table icebergTable = transaction.table(); - List commitTasks = fragments.stream() - .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) - .collect(toImmutableList()); + List commitTasks = + fragments.stream() + .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) + .collect(toImmutableList()); if (commitTasks.isEmpty()) { // Avoid recording "empty" write operation @@ -775,25 +818,34 @@ private void finishWrite( Schema schema = SchemaParser.fromJson(table.getTableSchemaJson()); - Map> deletesByFilePath = commitTasks.stream() - .filter(task -> task.getContent() == POSITION_DELETES) - .collect(groupingBy(task -> task.getReferencedDataFile().orElseThrow())); - Map> fullyDeletedFiles = deletesByFilePath - .entrySet().stream() - .filter(entry -> fileIsFullyDeleted(entry.getValue())) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); - - if (!deletesByFilePath.keySet().equals(fullyDeletedFiles.keySet()) || - commitTasks.stream().anyMatch(task -> task.getContent() == FileContent.DATA)) { + Map> deletesByFilePath = + commitTasks.stream() + .filter(task -> task.getContent() == POSITION_DELETES) + .collect(groupingBy(task -> task.getReferencedDataFile().orElseThrow())); + Map> fullyDeletedFiles = + deletesByFilePath.entrySet().stream() + .filter(entry -> fileIsFullyDeleted(entry.getValue())) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + + if (!deletesByFilePath.keySet().equals(fullyDeletedFiles.keySet()) + || commitTasks.stream().anyMatch(task -> task.getContent() == FileContent.DATA)) { RowDelta rowDelta = transaction.newRowDelta(); - table.getSnapshotId().map(icebergTable::snapshot).ifPresent(s -> rowDelta.validateFromSnapshot(s.snapshotId())); - TupleDomain dataColumnPredicate = table.getEnforcedPredicate().filter((column, domain) -> - !isMetadataColumnId(column.getId())); + table + .getSnapshotId() + .map(icebergTable::snapshot) + .ifPresent(s -> rowDelta.validateFromSnapshot(s.snapshotId())); + TupleDomain dataColumnPredicate = + table + .getEnforcedPredicate() + .filter((column, domain) -> !isMetadataColumnId(column.getId())); if (!dataColumnPredicate.isAll()) { rowDelta.conflictDetectionFilter(toIcebergExpression(dataColumnPredicate)); } - IsolationLevel isolationLevel = IsolationLevel.fromName( - icebergTable.properties().getOrDefault(DELETE_ISOLATION_LEVEL, DELETE_ISOLATION_LEVEL_DEFAULT)); + IsolationLevel isolationLevel = + IsolationLevel.fromName( + icebergTable + .properties() + .getOrDefault(DELETE_ISOLATION_LEVEL, DELETE_ISOLATION_LEVEL_DEFAULT)); if (isolationLevel == IsolationLevel.SERIALIZABLE) { rowDelta.validateNoConflictingDataFiles(); } @@ -807,27 +859,33 @@ private void finishWrite( ImmutableSet.Builder writtenFiles = ImmutableSet.builder(); ImmutableSet.Builder referencedDataFiles = ImmutableSet.builder(); for (CommitTaskData task : commitTasks) { - PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, task.getPartitionSpecJson()); - Type[] partitionColumnTypes = partitionSpec.fields().stream() - .map(field -> field.transform().getResultType(schema.findType(field.sourceId()))) - .toArray(Type[]::new); + PartitionSpec partitionSpec = + PartitionSpecParser.fromJson(schema, task.getPartitionSpecJson()); + Type[] partitionColumnTypes = + partitionSpec.fields().stream() + .map(field -> field.transform().getResultType(schema.findType(field.sourceId()))) + .toArray(Type[]::new); switch (task.getContent()) { case POSITION_DELETES: if (fullyDeletedFiles.containsKey(task.getReferencedDataFile().orElseThrow())) { continue; } - FileMetadata.Builder deleteBuilder = FileMetadata.deleteFileBuilder(partitionSpec) - .withPath(task.getPath()) - .withFormat(task.getFileFormat().toIceberg()) - .ofPositionDeletes() - .withFileSizeInBytes(task.getFileSizeInBytes()) - .withMetrics(task.getMetrics().metrics()); + FileMetadata.Builder deleteBuilder = + FileMetadata.deleteFileBuilder(partitionSpec) + .withPath(task.getPath()) + .withFormat(task.getFileFormat().toIceberg()) + .ofPositionDeletes() + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withMetrics(task.getMetrics().metrics()); if (!partitionSpec.fields().isEmpty()) { - String partitionDataJson = task.getPartitionDataJson() - .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); - deleteBuilder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); + String partitionDataJson = + task.getPartitionDataJson() + .orElseThrow( + () -> new VerifyException("No partition data for partitioned table")); + deleteBuilder.withPartition( + PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } rowDelta.addDeletes(deleteBuilder.build()); @@ -835,22 +893,27 @@ private void finishWrite( task.getReferencedDataFile().ifPresent(referencedDataFiles::add); break; case DATA: - DataFiles.Builder builder = DataFiles.builder(partitionSpec) - .withPath(task.getPath()) - .withFormat(task.getFileFormat().toIceberg()) - .withFileSizeInBytes(task.getFileSizeInBytes()) - .withMetrics(task.getMetrics().metrics()); + DataFiles.Builder builder = + DataFiles.builder(partitionSpec) + .withPath(task.getPath()) + .withFormat(task.getFileFormat().toIceberg()) + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withMetrics(task.getMetrics().metrics()); if (!icebergTable.spec().fields().isEmpty()) { - String partitionDataJson = task.getPartitionDataJson() - .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); - builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); + String partitionDataJson = + task.getPartitionDataJson() + .orElseThrow( + () -> new VerifyException("No partition data for partitioned table")); + builder.withPartition( + PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } rowDelta.addRows(builder.build()); writtenFiles.add(task.getPath()); break; default: - throw new UnsupportedOperationException("Unsupported task content: " + task.getContent()); + throw new UnsupportedOperationException( + "Unsupported task content: " + task.getContent()); } } @@ -865,17 +928,19 @@ private void finishWrite( } catch (ValidationException e) { throw new TrinoException( ICEBERG_COMMIT_ERROR, - "Failed to commit Iceberg update to table: " + table.getSchemaTableName(), e); + "Failed to commit Iceberg update to table: " + table.getSchemaTableName(), + e); } } if (!fullyDeletedFiles.isEmpty()) { try { TrinoFileSystem fileSystem = fileSystemFactory.create(session); - fileSystem.deleteFiles(fullyDeletedFiles.values().stream() - .flatMap(Collection::stream) - .map(CommitTaskData::getPath) - .collect(toImmutableSet())); + fileSystem.deleteFiles( + fullyDeletedFiles.values().stream() + .flatMap(Collection::stream) + .map(CommitTaskData::getPath) + .collect(toImmutableSet())); } catch (IOException e) { log.warn(e, "Failed to clean up uncommitted position delete files"); } @@ -891,7 +956,8 @@ private void finishWrite( } catch (ValidationException e) { throw new TrinoException( ICEBERG_COMMIT_ERROR, - "Failed to commit Iceberg update to table: " + table.getSchemaTableName(), e); + "Failed to commit Iceberg update to table: " + table.getSchemaTableName(), + e); } transaction = null; } @@ -900,57 +966,66 @@ private static boolean fileIsFullyDeleted(List positionDeletes) checkArgument(!positionDeletes.isEmpty(), "Cannot call fileIsFullyDeletes with an empty list"); String referencedDataFile = positionDeletes.get(0).getReferencedDataFile().orElseThrow(); long fileRecordCount = positionDeletes.get(0).getFileRecordCount().orElseThrow(); - checkArgument(positionDeletes.stream().allMatch(positionDelete -> - positionDelete.getReferencedDataFile().orElseThrow().equals(referencedDataFile) && - positionDelete.getFileRecordCount().orElseThrow() == fileRecordCount), + checkArgument( + positionDeletes.stream() + .allMatch( + positionDelete -> + positionDelete.getReferencedDataFile().orElseThrow().equals(referencedDataFile) + && positionDelete.getFileRecordCount().orElseThrow() == fileRecordCount), "All position deletes must be for the same file and have the same fileRecordCount"); - long deletedRowCount = positionDeletes.stream() - .map(CommitTaskData::getDeletedRowCount) - .mapToLong(Optional::orElseThrow) - .sum(); + long deletedRowCount = + positionDeletes.stream() + .map(CommitTaskData::getDeletedRowCount) + .mapToLong(Optional::orElseThrow) + .sum(); checkState( - deletedRowCount <= fileRecordCount, - "Found more deleted rows than exist in the file"); + deletedRowCount <= fileRecordCount, "Found more deleted rows than exist in the file"); return fileRecordCount == deletedRowCount; } @Override - public Optional getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Optional getInsertLayout( + ConnectorSession session, ConnectorTableHandle tableHandle) { IcebergTableHandle table = (IcebergTableHandle) tableHandle; Schema schema = SchemaParser.fromJson(table.getTableSchemaJson()); - PartitionSpec partitionSpec = PartitionSpecParser.fromJson( - schema, - table.getPartitionSpecJson().orElseThrow(() -> - new VerifyException("Partition spec missing in the table handle"))); + PartitionSpec partitionSpec = + PartitionSpecParser.fromJson( + schema, + table + .getPartitionSpecJson() + .orElseThrow( + () -> new VerifyException("Partition spec missing in the table handle"))); return getWriteLayout(schema, partitionSpec, false); } private Optional getWriteLayout( - Schema tableSchema, - PartitionSpec partitionSpec, - boolean forceRepartitioning) { + Schema tableSchema, PartitionSpec partitionSpec, boolean forceRepartitioning) { if (partitionSpec.isUnpartitioned()) { return Optional.empty(); } validateNotPartitionedByNestedField(tableSchema, partitionSpec); - Map columnById = getColumns(tableSchema, typeManager).stream() - .collect(toImmutableMap(IcebergColumnHandle::getId, identity())); - - List partitioningColumns = partitionSpec.fields().stream() - .sorted(Comparator.comparing(PartitionField::sourceId)) - .map(field -> requireNonNull( - columnById.get(field.sourceId()), - () -> "Cannot find source column for partitioning field " + field)) - .distinct() - .collect(toImmutableList()); - List partitioningColumnNames = partitioningColumns.stream() - .map(IcebergColumnHandle::getName) - .collect(toImmutableList()); - - if (!forceRepartitioning && - partitionSpec.fields().stream().allMatch(field -> field.transform().isIdentity())) { - // Do not set partitioningHandle, to let engine determine whether to repartition data or not, on stat-based basis. + Map columnById = + getColumns(tableSchema, typeManager).stream() + .collect(toImmutableMap(IcebergColumnHandle::getId, identity())); + + List partitioningColumns = + partitionSpec.fields().stream() + .sorted(Comparator.comparing(PartitionField::sourceId)) + .map( + field -> + requireNonNull( + columnById.get(field.sourceId()), + () -> "Cannot find source column for partitioning field " + field)) + .distinct() + .collect(toImmutableList()); + List partitioningColumnNames = + partitioningColumns.stream().map(IcebergColumnHandle::getName).collect(toImmutableList()); + + if (!forceRepartitioning + && partitionSpec.fields().stream().allMatch(field -> field.transform().isIdentity())) { + // Do not set partitioningHandle, to let engine determine whether to repartition data or not, + // on stat-based basis. return Optional.of(new ConnectorTableLayout(partitioningColumnNames)); } IcebergPartitioningHandle partitioningHandle = @@ -975,7 +1050,8 @@ public ConnectorInsertTableHandle beginInsert( return newWritableTableHandle(table.getSchemaTableName(), icebergTable, retryMode); } - private IcebergWritableTableHandle newWritableTableHandle(SchemaTableName name, Table table, RetryMode retryMode) { + private IcebergWritableTableHandle newWritableTableHandle( + SchemaTableName name, Table table, RetryMode retryMode) { return new IcebergWritableTableHandle( name, SchemaParser.toJson(table.schema()), @@ -994,9 +1070,10 @@ public Optional finishInsert( ConnectorInsertTableHandle insertHandle, Collection fragments, Collection computedStatistics) { - List commitTasks = fragments.stream() - .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) - .collect(toImmutableList()); + List commitTasks = + fragments.stream() + .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) + .collect(toImmutableList()); if (commitTasks.isEmpty()) { transaction = null; @@ -1005,23 +1082,29 @@ public Optional finishInsert( IcebergWritableTableHandle table = (IcebergWritableTableHandle) insertHandle; Table icebergTable = transaction.table(); - Type[] partitionColumnTypes = icebergTable.spec().fields().stream() - .map(field -> field.transform().getResultType( - icebergTable.schema().findType(field.sourceId()))) - .toArray(Type[]::new); + Type[] partitionColumnTypes = + icebergTable.spec().fields().stream() + .map( + field -> + field + .transform() + .getResultType(icebergTable.schema().findType(field.sourceId()))) + .toArray(Type[]::new); AppendFiles appendFiles = transaction.newAppend(); ImmutableSet.Builder writtenFiles = ImmutableSet.builder(); for (CommitTaskData task : commitTasks) { - DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()) - .withPath(task.getPath()) - .withFileSizeInBytes(task.getFileSizeInBytes()) - .withFormat(table.getFileFormat().toIceberg()) - .withMetrics(task.getMetrics().metrics()); + DataFiles.Builder builder = + DataFiles.builder(icebergTable.spec()) + .withPath(task.getPath()) + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withFormat(table.getFileFormat().toIceberg()) + .withMetrics(task.getMetrics().metrics()); if (!icebergTable.spec().fields().isEmpty()) { - String partitionDataJson = task.getPartitionDataJson() - .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); + String partitionDataJson = + task.getPartitionDataJson() + .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } @@ -1038,9 +1121,9 @@ public Optional finishInsert( transaction.commitTransaction(); transaction = null; - return Optional.of(new HiveWrittenPartitions(commitTasks.stream() - .map(CommitTaskData::getPath) - .collect(toImmutableList()))); + return Optional.of( + new HiveWrittenPartitions( + commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList()))); } private void cleanExtraOutputFiles(ConnectorSession session, Set writtenFiles) { @@ -1053,9 +1136,7 @@ private void cleanExtraOutputFiles(ConnectorSession session, Set written } private static void cleanExtraOutputFiles( - TrinoFileSystem fileSystem, - String queryId, String location, - Set fileNamesToKeep) { + TrinoFileSystem fileSystem, String queryId, String location, Set fileNamesToKeep) { checkArgument(!queryId.contains("-"), "query ID should not contain hyphens: %s", queryId); Deque filesToDelete = new ArrayDeque<>(); @@ -1075,7 +1156,8 @@ private static void cleanExtraOutputFiles( return; } - log.info("Found %s files to delete and %s to retain in location %s for query %s", + log.info( + "Found %s files to delete and %s to retain in location %s for query %s", filesToDelete.size(), fileNamesToKeep.size(), location, queryId); ImmutableList.Builder deletedFilesBuilder = ImmutableList.builder(); Iterator filesToDeleteIterator = filesToDelete.iterator(); @@ -1100,24 +1182,24 @@ private static void cleanExtraOutputFiles( List deletedFiles = deletedFilesBuilder.build(); if (!deletedFiles.isEmpty()) { - log.info("Deleted failed attempt files %s from %s for query %s", deletedFiles, location, queryId); + log.info( + "Deleted failed attempt files %s from %s for query %s", + deletedFiles, location, queryId); } } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, - format("Could not clean up extraneous output files; remaining files: %s", filesToDelete), e); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, + format("Could not clean up extraneous output files; remaining files: %s", filesToDelete), + e); } } private static Set getOutputFilesLocations(Set writtenFiles) { - return writtenFiles.stream() - .map(IcebergMetadata::getLocation) - .collect(toImmutableSet()); + return writtenFiles.stream().map(IcebergMetadata::getLocation).collect(toImmutableSet()); } private static Set getOutputFilesFileNames(Set writtenFiles) { - return writtenFiles.stream() - .map(IcebergUtil::fileName) - .collect(toImmutableSet()); + return writtenFiles.stream().map(IcebergUtil::fileName).collect(toImmutableSet()); } private static String getLocation(String path) { @@ -1134,17 +1216,18 @@ public Optional getTableHandleForExecute( Map executeProperties, RetryMode retryMode) { IcebergTableHandle tableHandle = (IcebergTableHandle) connectorTableHandle; - checkArgument(tableHandle.getTableType() == DATA, + checkArgument( + tableHandle.getTableType() == DATA, "Cannot execute table procedure %s on non-DATA table: %s", procedureName, tableHandle.getTableType()); Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName()); - if (tableHandle.getSnapshotId().isPresent() && - (tableHandle.getSnapshotId().get() != icebergTable.currentSnapshot().snapshotId())) { - throw new TrinoException(NOT_SUPPORTED, - "Cannot execute table procedure %s on old snapshot %s".formatted( - procedureName, tableHandle.getSnapshotId().get()) - ); + if (tableHandle.getSnapshotId().isPresent() + && (tableHandle.getSnapshotId().get() != icebergTable.currentSnapshot().snapshotId())) { + throw new TrinoException( + NOT_SUPPORTED, + "Cannot execute table procedure %s on old snapshot %s" + .formatted(procedureName, tableHandle.getSnapshotId().get())); } IcebergTableProcedureId procedureId; @@ -1157,43 +1240,46 @@ public Optional getTableHandleForExecute( return switch (procedureId) { case OPTIMIZE -> getTableHandleForOptimize(tableHandle, executeProperties, retryMode); case DROP_EXTENDED_STATS -> getTableHandleForDropExtendedStats(session, tableHandle); - case EXPIRE_SNAPSHOTS -> getTableHandleForExpireSnapshots(session, tableHandle, executeProperties); - case REMOVE_ORPHAN_FILES -> getTableHandleForRemoveOrphanFiles(session, tableHandle, executeProperties); + case EXPIRE_SNAPSHOTS -> getTableHandleForExpireSnapshots( + session, tableHandle, executeProperties); + case REMOVE_ORPHAN_FILES -> getTableHandleForRemoveOrphanFiles( + session, tableHandle, executeProperties); }; } private Optional getTableHandleForOptimize( - IcebergTableHandle tableHandle, - Map executeProperties, - RetryMode retryMode) { + IcebergTableHandle tableHandle, Map executeProperties, RetryMode retryMode) { DataSize maxScannedFileSize = (DataSize) executeProperties.get("file_size_threshold"); - return Optional.of(new IcebergTableExecuteHandle( - tableHandle.getSchemaTableName(), - OPTIMIZE, - new IcebergOptimizeHandle( - tableHandle.getSnapshotId(), - tableHandle.getTableSchemaJson(), - tableHandle.getPartitionSpecJson().orElseThrow(() -> - new VerifyException("Partition spec missing in the table handle")), - getColumns(SchemaParser.fromJson(tableHandle.getTableSchemaJson()), typeManager), - getFileFormat(tableHandle.getStorageProperties()), - tableHandle.getStorageProperties(), - maxScannedFileSize, - retryMode != NO_RETRIES), - tableHandle.getTableLocation())); + return Optional.of( + new IcebergTableExecuteHandle( + tableHandle.getSchemaTableName(), + OPTIMIZE, + new IcebergOptimizeHandle( + tableHandle.getSnapshotId(), + tableHandle.getTableSchemaJson(), + tableHandle + .getPartitionSpecJson() + .orElseThrow( + () -> new VerifyException("Partition spec missing in the table handle")), + getColumns(SchemaParser.fromJson(tableHandle.getTableSchemaJson()), typeManager), + getFileFormat(tableHandle.getStorageProperties()), + tableHandle.getStorageProperties(), + maxScannedFileSize, + retryMode != NO_RETRIES), + tableHandle.getTableLocation())); } private Optional getTableHandleForDropExtendedStats( - ConnectorSession session, - IcebergTableHandle tableHandle) { + ConnectorSession session, IcebergTableHandle tableHandle) { Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName()); - return Optional.of(new IcebergTableExecuteHandle( - tableHandle.getSchemaTableName(), - DROP_EXTENDED_STATS, - new IcebergDropExtendedStatsHandle(), - icebergTable.location())); + return Optional.of( + new IcebergTableExecuteHandle( + tableHandle.getSchemaTableName(), + DROP_EXTENDED_STATS, + new IcebergDropExtendedStatsHandle(), + icebergTable.location())); } private Optional getTableHandleForExpireSnapshots( @@ -1203,11 +1289,12 @@ private Optional getTableHandleForExpireSnapshots( Duration retentionThreshold = (Duration) executeProperties.get(RETENTION_THRESHOLD); Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName()); - return Optional.of(new IcebergTableExecuteHandle( - tableHandle.getSchemaTableName(), - EXPIRE_SNAPSHOTS, - new IcebergExpireSnapshotsHandle(retentionThreshold), - icebergTable.location())); + return Optional.of( + new IcebergTableExecuteHandle( + tableHandle.getSchemaTableName(), + EXPIRE_SNAPSHOTS, + new IcebergExpireSnapshotsHandle(retentionThreshold), + icebergTable.location())); } private Optional getTableHandleForRemoveOrphanFiles( @@ -1217,17 +1304,17 @@ private Optional getTableHandleForRemoveOrphanFiles Duration retentionThreshold = (Duration) executeProperties.get(RETENTION_THRESHOLD); Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName()); - return Optional.of(new IcebergTableExecuteHandle( - tableHandle.getSchemaTableName(), - REMOVE_ORPHAN_FILES, - new IcebergRemoveOrphanFilesHandle(retentionThreshold), - icebergTable.location())); + return Optional.of( + new IcebergTableExecuteHandle( + tableHandle.getSchemaTableName(), + REMOVE_ORPHAN_FILES, + new IcebergRemoveOrphanFilesHandle(retentionThreshold), + icebergTable.location())); } @Override public Optional getLayoutForTableExecute( - ConnectorSession session, - ConnectorTableExecuteHandle tableExecuteHandle) { + ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle; switch (executeHandle.getProcedureId()) { case OPTIMIZE: @@ -1237,23 +1324,25 @@ public Optional getLayoutForTableExecute( case REMOVE_ORPHAN_FILES: // handled via executeTableExecute } - throw new IllegalArgumentException("Unknown procedure '" + executeHandle.getProcedureId() + "'"); + throw new IllegalArgumentException( + "Unknown procedure '" + executeHandle.getProcedureId() + "'"); } private Optional getLayoutForOptimize( - ConnectorSession session, - IcebergTableExecuteHandle executeHandle) { + ConnectorSession session, IcebergTableExecuteHandle executeHandle) { Table icebergTable = catalog.loadTable(session, executeHandle.getSchemaTableName()); - // from performance perspective it is better to have lower number of bigger files than other way around + // from performance perspective it is better to have lower number of bigger files than other way + // around // thus we force repartitioning for optimize to achieve this return getWriteLayout(icebergTable.schema(), icebergTable.spec(), true); } @Override - public BeginTableExecuteResult beginTableExecute( - ConnectorSession session, - ConnectorTableExecuteHandle tableExecuteHandle, - ConnectorTableHandle updatedSourceTableHandle) { + public BeginTableExecuteResult + beginTableExecute( + ConnectorSession session, + ConnectorTableExecuteHandle tableExecuteHandle, + ConnectorTableHandle updatedSourceTableHandle) { IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle; IcebergTableHandle table = (IcebergTableHandle) updatedSourceTableHandle; switch (executeHandle.getProcedureId()) { @@ -1264,34 +1353,36 @@ public BeginTableExecuteResult beginOptimize( - ConnectorSession session, - IcebergTableExecuteHandle executeHandle, - IcebergTableHandle table) { - IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle(); + ConnectorSession session, IcebergTableExecuteHandle executeHandle, IcebergTableHandle table) { + IcebergOptimizeHandle optimizeHandle = + (IcebergOptimizeHandle) executeHandle.getProcedureHandle(); Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); validateNotModifyingOldSnapshot(table, icebergTable); validateNotPartitionedByNestedField(icebergTable.schema(), icebergTable.spec()); - int tableFormatVersion = ((HasTableOperations) icebergTable).operations().current().formatVersion(); + int tableFormatVersion = + ((HasTableOperations) icebergTable).operations().current().formatVersion(); if (tableFormatVersion > OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION) { - throw new TrinoException(NOT_SUPPORTED, format( - "%s is not supported for Iceberg table format version > %d. Table %s format version is %s.", - OPTIMIZE.name(), - OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION, - table.getSchemaTableName(), - tableFormatVersion)); + throw new TrinoException( + NOT_SUPPORTED, + format( + "%s is not supported for Iceberg table format version > %d. Table %s format version is %s.", + OPTIMIZE.name(), + OPTIMIZE_MAX_SUPPORTED_TABLE_VERSION, + table.getSchemaTableName(), + tableFormatVersion)); } beginTransaction(icebergTable); return new BeginTableExecuteResult<>( - executeHandle, - table.forOptimize(true, optimizeHandle.getMaxScannedFileSize())); + executeHandle, table.forOptimize(true, optimizeHandle.getMaxScannedFileSize())); } @Override @@ -1310,7 +1401,8 @@ public void finishTableExecute( case REMOVE_ORPHAN_FILES: // handled via executeTableExecute } - throw new IllegalArgumentException("Unknown procedure '" + executeHandle.getProcedureId() + "'"); + throw new IllegalArgumentException( + "Unknown procedure '" + executeHandle.getProcedureId() + "'"); } private void finishOptimize( @@ -1318,48 +1410,59 @@ private void finishOptimize( IcebergTableExecuteHandle executeHandle, Collection fragments, List splitSourceInfo) { - IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle(); + IcebergOptimizeHandle optimizeHandle = + (IcebergOptimizeHandle) executeHandle.getProcedureHandle(); Table icebergTable = transaction.table(); // files to be deleted ImmutableSet.Builder scannedDataFilesBuilder = ImmutableSet.builder(); ImmutableSet.Builder scannedDeleteFilesBuilder = ImmutableSet.builder(); - splitSourceInfo.stream().map(DataFileWithDeleteFiles.class::cast).forEach(dataFileWithDeleteFiles -> { - scannedDataFilesBuilder.add(dataFileWithDeleteFiles.getDataFile()); - scannedDeleteFilesBuilder.addAll(dataFileWithDeleteFiles.getDeleteFiles()); - }); + splitSourceInfo.stream() + .map(DataFileWithDeleteFiles.class::cast) + .forEach( + dataFileWithDeleteFiles -> { + scannedDataFilesBuilder.add(dataFileWithDeleteFiles.getDataFile()); + scannedDeleteFilesBuilder.addAll(dataFileWithDeleteFiles.getDeleteFiles()); + }); Set scannedDataFiles = scannedDataFilesBuilder.build(); Set fullyAppliedDeleteFiles = scannedDeleteFilesBuilder.build(); - List commitTasks = fragments.stream() - .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) - .collect(toImmutableList()); - - Type[] partitionColumnTypes = icebergTable.spec().fields().stream() - .map(field -> field.transform().getResultType( - icebergTable.schema().findType(field.sourceId()))) - .toArray(Type[]::new); + List commitTasks = + fragments.stream() + .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) + .collect(toImmutableList()); + + Type[] partitionColumnTypes = + icebergTable.spec().fields().stream() + .map( + field -> + field + .transform() + .getResultType(icebergTable.schema().findType(field.sourceId()))) + .toArray(Type[]::new); Set newFiles = new HashSet<>(); for (CommitTaskData task : commitTasks) { - DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()) - .withPath(task.getPath()) - .withFileSizeInBytes(task.getFileSizeInBytes()) - .withFormat(optimizeHandle.getFileFormat().toIceberg()) - .withMetrics(task.getMetrics().metrics()); + DataFiles.Builder builder = + DataFiles.builder(icebergTable.spec()) + .withPath(task.getPath()) + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withFormat(optimizeHandle.getFileFormat().toIceberg()) + .withMetrics(task.getMetrics().metrics()); if (!icebergTable.spec().fields().isEmpty()) { - String partitionDataJson = task.getPartitionDataJson() - .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); + String partitionDataJson = + task.getPartitionDataJson() + .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } newFiles.add(builder.build()); } - if (optimizeHandle.getSnapshotId().isEmpty() || - scannedDataFiles.isEmpty() && fullyAppliedDeleteFiles.isEmpty() && newFiles.isEmpty()) { + if (optimizeHandle.getSnapshotId().isEmpty() + || scannedDataFiles.isEmpty() && fullyAppliedDeleteFiles.isEmpty() && newFiles.isEmpty()) { // Either the table is empty, or the table scan turned out to be empty, nothing to commit transaction = null; return; @@ -1369,16 +1472,16 @@ private void finishOptimize( if (optimizeHandle.isRetriesEnabled()) { cleanExtraOutputFiles( session, - newFiles.stream() - .map(dataFile -> dataFile.path().toString()) - .collect(toImmutableSet())); + newFiles.stream().map(dataFile -> dataFile.path().toString()).collect(toImmutableSet())); } RewriteFiles rewriteFiles = transaction.newRewrite(); - rewriteFiles.rewriteFiles(scannedDataFiles, fullyAppliedDeleteFiles, newFiles, ImmutableSet.of()); + rewriteFiles.rewriteFiles( + scannedDataFiles, fullyAppliedDeleteFiles, newFiles, ImmutableSet.of()); // Table.snapshot method returns null if there is no matching snapshot - Snapshot snapshot = requireNonNull( - icebergTable.snapshot(optimizeHandle.getSnapshotId().get()), "snapshot is null"); + Snapshot snapshot = + requireNonNull( + icebergTable.snapshot(optimizeHandle.getSnapshotId().get()), "snapshot is null"); rewriteFiles.validateFromSnapshot(snapshot.snapshotId()); commit(rewriteFiles, session); transaction.commitTransaction(); @@ -1386,7 +1489,8 @@ private void finishOptimize( } @Override - public void executeTableExecute(ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { + public void executeTableExecute( + ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) { IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle; switch (executeHandle.getProcedureId()) { case DROP_EXTENDED_STATS: @@ -1399,13 +1503,17 @@ public void executeTableExecute(ConnectorSession session, ConnectorTableExecuteH executeRemoveOrphanFiles(session, executeHandle); return; default: - throw new IllegalArgumentException("Unknown procedure '" + executeHandle.getProcedureId() + "'"); + throw new IllegalArgumentException( + "Unknown procedure '" + executeHandle.getProcedureId() + "'"); } } - private void executeDropExtendedStats(ConnectorSession session, IcebergTableExecuteHandle executeHandle) { - checkArgument(executeHandle.getProcedureHandle() instanceof IcebergDropExtendedStatsHandle, - "Unexpected procedure handle %s", executeHandle.getProcedureHandle()); + private void executeDropExtendedStats( + ConnectorSession session, IcebergTableExecuteHandle executeHandle) { + checkArgument( + executeHandle.getProcedureHandle() instanceof IcebergDropExtendedStatsHandle, + "Unexpected procedure handle %s", + executeHandle.getProcedureHandle()); Table icebergTable = catalog.loadTable(session, executeHandle.getSchemaTableName()); beginTransaction(icebergTable); @@ -1425,12 +1533,14 @@ private void executeDropExtendedStats(ConnectorSession session, IcebergTableExec transaction = null; } - private void executeExpireSnapshots(ConnectorSession session, IcebergTableExecuteHandle executeHandle) { + private void executeExpireSnapshots( + ConnectorSession session, IcebergTableExecuteHandle executeHandle) { IcebergExpireSnapshotsHandle expireSnapshotsHandle = (IcebergExpireSnapshotsHandle) executeHandle.getProcedureHandle(); Table table = catalog.loadTable(session, executeHandle.getSchemaTableName()); - Duration retention = requireNonNull(expireSnapshotsHandle.getRetentionThreshold(), "retention is null"); + Duration retention = + requireNonNull(expireSnapshotsHandle.getRetentionThreshold(), "retention is null"); validateTableExecuteParameters( table, executeHandle.getSchemaTableName(), @@ -1444,26 +1554,30 @@ private void executeExpireSnapshots(ConnectorSession session, IcebergTableExecut TrinoFileSystem fileSystem = fileSystemFactory.create(session); List pathsToDelete = new ArrayList<>(); // deleteFunction is not accessed from multiple threads unless .executeDeleteWith() is used - Consumer deleteFunction = path -> { - pathsToDelete.add(path); - if (pathsToDelete.size() == DELETE_BATCH_SIZE) { - try { - fileSystem.deleteFiles(pathsToDelete); - pathsToDelete.clear(); - } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to delete files during snapshot expiration", e); - } - } - }; + Consumer deleteFunction = + path -> { + pathsToDelete.add(path); + if (pathsToDelete.size() == DELETE_BATCH_SIZE) { + try { + fileSystem.deleteFiles(pathsToDelete); + pathsToDelete.clear(); + } catch (IOException e) { + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, "Failed to delete files during snapshot expiration", e); + } + } + }; - table.expireSnapshots() + table + .expireSnapshots() .expireOlderThan(expireTimestampMillis) .deleteWith(deleteFunction) .commit(); try { fileSystem.deleteFiles(pathsToDelete); } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to delete files during snapshot expiration", e); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, "Failed to delete files during snapshot expiration", e); } } @@ -1478,41 +1592,49 @@ private static void validateTableExecuteParameters( int tableFormatVersion = ((HasTableOperations) table).operations().current().formatVersion(); if (tableFormatVersion > CLEANING_UP_PROCEDURES_MAX_SUPPORTED_TABLE_VERSION) { // It is not known if future version won't bring any new kind of metadata or data files - // because of the way procedures are implemented it is safer to fail here than to potentially remove + // because of the way procedures are implemented it is safer to fail here than to potentially + // remove // files that should stay there - throw new TrinoException(NOT_SUPPORTED, format( - "%s is not supported for Iceberg table format version > %d. " + - "Table %s format version is %s.", - procedureName, - CLEANING_UP_PROCEDURES_MAX_SUPPORTED_TABLE_VERSION, - schemaTableName, - tableFormatVersion)); + throw new TrinoException( + NOT_SUPPORTED, + format( + "%s is not supported for Iceberg table format version > %d. " + + "Table %s format version is %s.", + procedureName, + CLEANING_UP_PROCEDURES_MAX_SUPPORTED_TABLE_VERSION, + schemaTableName, + tableFormatVersion)); } Map properties = table.properties(); if (properties.containsKey(WRITE_LOCATION_PROVIDER_IMPL)) { throw new TrinoException( NOT_SUPPORTED, - "Table " + schemaTableName + " specifies " + properties.get(WRITE_LOCATION_PROVIDER_IMPL) + - " as a location provider. Writing to Iceberg tables with custom location provider is not supported."); + "Table " + + schemaTableName + + " specifies " + + properties.get(WRITE_LOCATION_PROVIDER_IMPL) + + " as a location provider. Writing to Iceberg tables with custom location provider is not supported."); } Duration retention = requireNonNull(retentionThreshold, "retention is null"); checkProcedureArgument( retention.compareTo(minRetention) >= 0, - "Retention specified (%s) is shorter than the minimum retention configured in the system (%s). " + - "Minimum retention can be changed with %s configuration property or iceberg.%s session property", + "Retention specified (%s) is shorter than the minimum retention configured in the system (%s). " + + "Minimum retention can be changed with %s configuration property or iceberg.%s session property", retention, minRetention, minRetentionParameterName, sessionMinRetentionParameterName); } - public void executeRemoveOrphanFiles(ConnectorSession session, IcebergTableExecuteHandle executeHandle) { + public void executeRemoveOrphanFiles( + ConnectorSession session, IcebergTableExecuteHandle executeHandle) { IcebergRemoveOrphanFilesHandle removeOrphanFilesHandle = (IcebergRemoveOrphanFilesHandle) executeHandle.getProcedureHandle(); Table table = catalog.loadTable(session, executeHandle.getSchemaTableName()); - Duration retention = requireNonNull(removeOrphanFilesHandle.getRetentionThreshold(), "retention is null"); + Duration retention = + requireNonNull(removeOrphanFilesHandle.getRetentionThreshold(), "retention is null"); validateTableExecuteParameters( table, executeHandle.getSchemaTableName(), @@ -1540,7 +1662,8 @@ private void removeOrphanFiles( // Similarly to issues like https://github.com/trinodb/trino/issues/13759, // equivalent paths may have different String representations due to things like double slashes. // Using file names may result in retaining files which could be removed. - // However, in practice Iceberg metadata and data files have UUIDs in their names which makes this unlikely. + // However, in practice Iceberg metadata and data files have UUIDs in their names which makes + // this unlikely. ImmutableSet.Builder validMetadataFileNames = ImmutableSet.builder(); ImmutableSet.Builder validDataFileNames = ImmutableSet.builder(); @@ -1556,14 +1679,16 @@ private void removeOrphanFiles( } validMetadataFileNames.add(fileName(manifest.path())); - try (ManifestReader> manifestReader = readerForManifest(table, manifest)) { + try (ManifestReader> manifestReader = + readerForManifest(table, manifest)) { for (ContentFile contentFile : manifestReader) { validDataFileNames.add(fileName(contentFile.path().toString())); } } catch (IOException e) { throw new TrinoException( ICEBERG_FILESYSTEM_ERROR, - "Unable to list manifest file content from " + manifest.path(), e); + "Unable to list manifest file content from " + manifest.path(), + e); } } } @@ -1576,10 +1701,16 @@ private void removeOrphanFiles( scanAndDeleteInvalidFiles( table, session, schemaTableName, expireTimestamp, validDataFileNames.build(), "data"); scanAndDeleteInvalidFiles( - table, session, schemaTableName, expireTimestamp, validMetadataFileNames.build(), "metadata"); + table, + session, + schemaTableName, + expireTimestamp, + validMetadataFileNames.build(), + "metadata"); } - private static ManifestReader> readerForManifest(Table table, ManifestFile manifest) { + private static ManifestReader> readerForManifest( + Table table, ManifestFile manifest) { return switch (manifest.content()) { case DATA -> ManifestFiles.read(manifest, table.io()); case DELETES -> ManifestFiles.readDeleteManifest(manifest, table.io(), table.specs()); @@ -1590,7 +1721,8 @@ private void scanAndDeleteInvalidFiles( Table table, ConnectorSession session, SchemaTableName schemaTableName, - long expireTimestamp, Set validFiles, + long expireTimestamp, + Set validFiles, String subfolder) { try { List filesToDelete = new ArrayList<>(); @@ -1598,37 +1730,52 @@ private void scanAndDeleteInvalidFiles( FileIterator allFiles = fileSystem.listFiles(table.location() + "/" + subfolder); while (allFiles.hasNext()) { FileEntry entry = allFiles.next(); - if (entry.lastModified() < expireTimestamp && !validFiles.contains(fileName(entry.path()))) { + if (entry.lastModified() < expireTimestamp + && !validFiles.contains(fileName(entry.path()))) { filesToDelete.add(entry.path()); if (filesToDelete.size() >= DELETE_BATCH_SIZE) { - log.debug("Deleting files while removing orphan files for table %s [%s]", schemaTableName, filesToDelete); + log.debug( + "Deleting files while removing orphan files for table %s [%s]", + schemaTableName, filesToDelete); fileSystem.deleteFiles(filesToDelete); filesToDelete.clear(); } } else { - log.debug("%s file retained while removing orphan files %s", entry.path(), schemaTableName.getTableName()); + log.debug( + "%s file retained while removing orphan files %s", + entry.path(), schemaTableName.getTableName()); } } if (!filesToDelete.isEmpty()) { - log.debug("Deleting files while removing orphan files for table %s %s", schemaTableName, filesToDelete); + log.debug( + "Deleting files while removing orphan files for table %s %s", + schemaTableName, filesToDelete); fileSystem.deleteFiles(filesToDelete); } } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed accessing data for table: " + schemaTableName, e); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, "Failed accessing data for table: " + schemaTableName, e); } } @Override public Optional getInfo(ConnectorTableHandle tableHandle) { IcebergTableHandle icebergTableHandle = (IcebergTableHandle) tableHandle; - Optional partitioned = icebergTableHandle.getPartitionSpecJson() - .map(partitionSpecJson -> PartitionSpecParser.fromJson( - SchemaParser.fromJson(icebergTableHandle.getTableSchemaJson()), partitionSpecJson).isPartitioned()); + Optional partitioned = + icebergTableHandle + .getPartitionSpecJson() + .map( + partitionSpecJson -> + PartitionSpecParser.fromJson( + SchemaParser.fromJson(icebergTableHandle.getTableSchemaJson()), + partitionSpecJson) + .isPartitioned()); - return Optional.of(new IcebergInputInfo( - icebergTableHandle.getSnapshotId(), - partitioned, - getFileFormat(icebergTableHandle.getStorageProperties()).name())); + return Optional.of( + new IcebergInputInfo( + icebergTableHandle.getSnapshotId(), + partitioned, + getFileFormat(icebergTableHandle.getStorageProperties()).name())); } @Override @@ -1637,7 +1784,8 @@ public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle } @Override - public void renameTable(ConnectorSession session, ConnectorTableHandle tableHandle, SchemaTableName newTable) { + public void renameTable( + ConnectorSession session, ConnectorTableHandle tableHandle, SchemaTableName newTable) { catalog.renameTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName(), newTable); } @@ -1658,10 +1806,12 @@ public void setTableProperties( switch (trinoPropertyName) { case FILE_FORMAT_PROPERTY: - updateProperties.defaultFormat(((IcebergFileFormat) propertyValue.orElseThrow()).toIceberg()); + updateProperties.defaultFormat( + ((IcebergFileFormat) propertyValue.orElseThrow()).toIceberg()); break; case FORMAT_VERSION_PROPERTY: - // UpdateProperties#commit will trigger any necessary metadata updates required for the new spec version + // UpdateProperties#commit will trigger any necessary metadata updates required for the + // new spec version updateProperty( updateProperties, FORMAT_VERSION, @@ -1670,7 +1820,8 @@ public void setTableProperties( break; default: // TODO: Support updating partitioning https://github.com/trinodb/trino/issues/12174 - throw new TrinoException(NOT_SUPPORTED, "Updating the " + trinoPropertyName + " property is not supported"); + throw new TrinoException( + NOT_SUPPORTED, "Updating the " + trinoPropertyName + " property is not supported"); } } @@ -1695,17 +1846,22 @@ private static void updateProperty( } @Override - public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata column) { - Table icebergTable = catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); - icebergTable.updateSchema() + public void addColumn( + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnMetadata column) { + Table icebergTable = + catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); + icebergTable + .updateSchema() .addColumn(column.getName(), toIcebergType(column.getType()), column.getComment()) .commit(); } @Override - public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column) { + public void dropColumn( + ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle column) { IcebergColumnHandle handle = (IcebergColumnHandle) column; - Table icebergTable = catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); + Table icebergTable = + catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); icebergTable.updateSchema().deleteColumn(handle.getName()).commit(); } @@ -1716,22 +1872,25 @@ public void renameColumn( ColumnHandle source, String target) { IcebergColumnHandle columnHandle = (IcebergColumnHandle) source; - Table icebergTable = catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); + Table icebergTable = + catalog.loadTable(session, ((IcebergTableHandle) tableHandle).getSchemaTableName()); icebergTable.updateSchema().renameColumn(columnHandle.getName(), target).commit(); } private List getColumnMetadatas(Schema schema) { ImmutableList.Builder columns = ImmutableList.builder(); - List schemaColumns = schema.columns().stream() - .map(column -> - ColumnMetadata.builder() - .setName(column.name()) - .setType(toTrinoType(column.type(), typeManager)) - .setNullable(column.isOptional()) - .setComment(Optional.ofNullable(column.doc())) - .build()) - .collect(toImmutableList()); + List schemaColumns = + schema.columns().stream() + .map( + column -> + ColumnMetadata.builder() + .setName(column.name()) + .setType(toTrinoType(column.type(), typeManager)) + .setNullable(column.isOptional()) + .setComment(Optional.ofNullable(column.doc())) + .build()) + .collect(toImmutableList()); columns.addAll(schemaColumns); columns.add(pathColumnMetadata()); columns.add(fileModifiedTimeColumnMetadata()); @@ -1744,15 +1903,17 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata( ConnectorTableHandle tableHandle, Map analyzeProperties) { if (!isExtendedStatisticsEnabled(session)) { - throw new TrinoException(NOT_SUPPORTED, - "Analyze is not enabled. You can enable analyze using %s config or %s catalog session property".formatted( - IcebergConfig.EXTENDED_STATISTICS_CONFIG, - IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED)); + throw new TrinoException( + NOT_SUPPORTED, + "Analyze is not enabled. You can enable analyze using %s config or %s catalog session property" + .formatted( + IcebergConfig.EXTENDED_STATISTICS_CONFIG, + IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED)); } IcebergTableHandle handle = (IcebergTableHandle) tableHandle; - checkArgument(handle.getTableType() == DATA, - "Cannot analyze non-DATA table: %s", handle.getTableType()); + checkArgument( + handle.getTableType() == DATA, "Cannot analyze non-DATA table: %s", handle.getTableType()); if (handle.getSnapshotId().isEmpty()) { // No snapshot, table is empty @@ -1760,33 +1921,44 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata( } ConnectorTableMetadata tableMetadata = getTableMetadata(session, handle); - Set allScalarColumnNames = tableMetadata.getColumns().stream() - .filter(column -> !column.isHidden()) - .filter(column -> column.getType().getTypeParameters().isEmpty()) // is scalar type - .map(ColumnMetadata::getName) - .collect(toImmutableSet()); - - Set analyzeColumnNames = getColumnNames(analyzeProperties) - .map(columnNames -> { - // validate that proper column names are passed via `columns` analyze property - if (columnNames.isEmpty()) { - throw new TrinoException(INVALID_ANALYZE_PROPERTY, "Cannot specify empty list of columns for analysis"); - } - if (!allScalarColumnNames.containsAll(columnNames)) { - throw new TrinoException( - INVALID_ANALYZE_PROPERTY, - format("Invalid columns specified for analysis: %s", - Sets.difference(columnNames, allScalarColumnNames))); - } - return columnNames; - }) - .orElse(allScalarColumnNames); - - Set columnStatistics = tableMetadata.getColumns().stream() - .filter(column -> analyzeColumnNames.contains(column.getName())) - .map(column -> new ColumnStatisticMetadata(column.getName(), - NUMBER_OF_DISTINCT_VALUES_NAME, NUMBER_OF_DISTINCT_VALUES_FUNCTION)) - .collect(toImmutableSet()); + Set allScalarColumnNames = + tableMetadata.getColumns().stream() + .filter(column -> !column.isHidden()) + .filter(column -> column.getType().getTypeParameters().isEmpty()) // is scalar type + .map(ColumnMetadata::getName) + .collect(toImmutableSet()); + + Set analyzeColumnNames = + getColumnNames(analyzeProperties) + .map( + columnNames -> { + // validate that proper column names are passed via `columns` analyze property + if (columnNames.isEmpty()) { + throw new TrinoException( + INVALID_ANALYZE_PROPERTY, + "Cannot specify empty list of columns for analysis"); + } + if (!allScalarColumnNames.containsAll(columnNames)) { + throw new TrinoException( + INVALID_ANALYZE_PROPERTY, + format( + "Invalid columns specified for analysis: %s", + Sets.difference(columnNames, allScalarColumnNames))); + } + return columnNames; + }) + .orElse(allScalarColumnNames); + + Set columnStatistics = + tableMetadata.getColumns().stream() + .filter(column -> analyzeColumnNames.contains(column.getName())) + .map( + column -> + new ColumnStatisticMetadata( + column.getName(), + NUMBER_OF_DISTINCT_VALUES_NAME, + NUMBER_OF_DISTINCT_VALUES_FUNCTION)) + .collect(toImmutableSet()); return new ConnectorAnalyzeMetadata( tableHandle, @@ -1794,7 +1966,8 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata( } @Override - public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle) { + public ConnectorTableHandle beginStatisticsCollection( + ConnectorSession session, ConnectorTableHandle tableHandle) { IcebergTableHandle handle = (IcebergTableHandle) tableHandle; Table icebergTable = catalog.loadTable(session, handle.getSchemaTableName()); beginTransaction(icebergTable); @@ -1815,7 +1988,8 @@ public void finishStatisticsCollection( "Unexpected computed statistics that cannot be attached to a snapshot because none exists: %s", computedStatistics); - // TODO (https://github.com/trinodb/trino/issues/15397): remove support for Trino-specific statistics properties + // TODO (https://github.com/trinodb/trino/issues/15397): remove support for Trino-specific + // statistics properties // Drop all stats. Empty table needs none UpdateProperties updateProperties = transaction.updateProperties(); table.properties().keySet().stream() @@ -1829,39 +2003,49 @@ public void finishStatisticsCollection( } long snapshotId = handle.getSnapshotId().orElseThrow(); - Map columnNameToId = table.schema().columns().stream() - .collect(toImmutableMap(nestedField -> nestedField.name().toLowerCase(ENGLISH), Types.NestedField::fieldId)); + Map columnNameToId = + table.schema().columns().stream() + .collect( + toImmutableMap( + nestedField -> nestedField.name().toLowerCase(ENGLISH), + Types.NestedField::fieldId)); Set columnIds = ImmutableSet.copyOf(columnNameToId.values()); - // TODO (https://github.com/trinodb/trino/issues/15397): remove support for Trino-specific statistics properties + // TODO (https://github.com/trinodb/trino/issues/15397): remove support for Trino-specific + // statistics properties // Drop stats for obsolete columns UpdateProperties updateProperties = transaction.updateProperties(); table.properties().keySet().stream() - .filter(key -> { - if (!key.startsWith(TRINO_STATS_PREFIX)) { - return false; - } - Matcher matcher = TRINO_STATS_COLUMN_ID_PATTERN.matcher(key); - if (!matcher.matches()) { - return false; - } - return !columnIds.contains(Integer.parseInt(matcher.group("columnId"))); - }) + .filter( + key -> { + if (!key.startsWith(TRINO_STATS_PREFIX)) { + return false; + } + Matcher matcher = TRINO_STATS_COLUMN_ID_PATTERN.matcher(key); + if (!matcher.matches()) { + return false; + } + return !columnIds.contains(Integer.parseInt(matcher.group("columnId"))); + }) .forEach(updateProperties::remove); updateProperties.commit(); ImmutableMap.Builder ndvSketches = ImmutableMap.builder(); for (ComputedStatistics computedStatistic : computedStatistics) { - verify(computedStatistic.getGroupingColumns().isEmpty() && - computedStatistic.getGroupingValues().isEmpty(), "Unexpected grouping"); + verify( + computedStatistic.getGroupingColumns().isEmpty() + && computedStatistic.getGroupingValues().isEmpty(), + "Unexpected grouping"); verify(computedStatistic.getTableStatistics().isEmpty(), "Unexpected table statistics"); - for (Map.Entry entry : computedStatistic.getColumnStatistics().entrySet()) { + for (Map.Entry entry : + computedStatistic.getColumnStatistics().entrySet()) { ColumnStatisticMetadata statisticMetadata = entry.getKey(); if (statisticMetadata.getConnectorAggregationId().equals(NUMBER_OF_DISTINCT_VALUES_NAME)) { - Integer columnId = verifyNotNull( - columnNameToId.get(statisticMetadata.getColumnName()), - "Column not found in table: [%s]", - statisticMetadata.getColumnName()); + Integer columnId = + verifyNotNull( + columnNameToId.get(statisticMetadata.getColumnName()), + "Column not found in table: [%s]", + statisticMetadata.getColumnName()); CompactSketch sketch = DataSketchStateSerializer.deserialize(entry.getValue(), 0); ndvSketches.put(columnId, sketch); } else { @@ -1870,21 +2054,18 @@ public void finishStatisticsCollection( } } - StatisticsFile statisticsFile = tableStatisticsWriter.writeStatisticsFile( - session, - table, - snapshotId, - ndvSketches.buildOrThrow()); - transaction.updateStatistics() - .setStatistics(snapshotId, statisticsFile) - .commit(); + StatisticsFile statisticsFile = + tableStatisticsWriter.writeStatisticsFile( + session, table, snapshotId, ndvSketches.buildOrThrow()); + transaction.updateStatistics().setStatistics(snapshotId, statisticsFile).commit(); transaction.commitTransaction(); transaction = null; } @Override - public Optional applyDelete(ConnectorSession session, ConnectorTableHandle handle) { + public Optional applyDelete( + ConnectorSession session, ConnectorTableHandle handle) { IcebergTableHandle table = (IcebergTableHandle) handle; TupleDomain medataColumnPredicate = table.getEnforcedPredicate().filter((column, domain) -> isMetadataColumnId(column.getId())); @@ -1896,23 +2077,25 @@ public Optional applyDelete(ConnectorSession session, Conn @Override public Optional getUpdateLayout( - ConnectorSession session, - ConnectorTableHandle tableHandle) { + ConnectorSession session, ConnectorTableHandle tableHandle) { return Optional.of(IcebergUpdateHandle.INSTANCE); } - private static void validateNotModifyingOldSnapshot(IcebergTableHandle table, Table icebergTable) { - if (table.getSnapshotId().isPresent() && - (table.getSnapshotId().get() != icebergTable.currentSnapshot().snapshotId())) { + private static void validateNotModifyingOldSnapshot( + IcebergTableHandle table, Table icebergTable) { + if (table.getSnapshotId().isPresent() + && (table.getSnapshotId().get() != icebergTable.currentSnapshot().snapshotId())) { throw new TrinoException(NOT_SUPPORTED, "Modifying old snapshot is not supported in Iceberg"); } } - public static void validateNotPartitionedByNestedField(Schema schema, PartitionSpec partitionSpec) { + public static void validateNotPartitionedByNestedField( + Schema schema, PartitionSpec partitionSpec) { Map indexParents = indexParents(schema.asStruct()); for (PartitionField field : partitionSpec.fields()) { if (indexParents.containsKey(field.sourceId())) { - throw new TrinoException(NOT_SUPPORTED, "Partitioning by nested field is unsupported: " + field.name()); + throw new TrinoException( + NOT_SUPPORTED, "Partitioning by nested field is unsupported: " + field.name()); } } } @@ -1932,7 +2115,8 @@ public void renameView(ConnectorSession session, SchemaTableName source, SchemaT } @Override - public void setViewAuthorization(ConnectorSession session, SchemaTableName viewName, TrinoPrincipal principal) { + public void setViewAuthorization( + ConnectorSession session, SchemaTableName viewName, TrinoPrincipal principal) { catalog.setViewPrincipal(session, viewName, principal); } @@ -1947,24 +2131,35 @@ public List listViews(ConnectorSession session, Optional getViews(ConnectorSession session, Optional schemaName) { + public Map getViews( + ConnectorSession session, Optional schemaName) { return catalog.getViews(session, schemaName); } @Override - public Optional getView(ConnectorSession session, SchemaTableName viewName) { + public Optional getView( + ConnectorSession session, SchemaTableName viewName) { return catalog.getView(session, viewName); } @Override - public ColumnHandle getMergeRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle) { - StructType type = StructType.of(ImmutableList.builder() - .add(MetadataColumns.FILE_PATH) - .add(MetadataColumns.ROW_POSITION) - .add(NestedField.required(TRINO_MERGE_FILE_RECORD_COUNT, "file_record_count", LongType.get())) - .add(NestedField.required(TRINO_MERGE_PARTITION_SPEC_ID, "partition_spec_id", IntegerType.get())) - .add(NestedField.required(TRINO_MERGE_PARTITION_DATA, "partition_data", StringType.get())) - .build()); + public ColumnHandle getMergeRowIdColumnHandle( + ConnectorSession session, ConnectorTableHandle tableHandle) { + StructType type = + StructType.of( + ImmutableList.builder() + .add(MetadataColumns.FILE_PATH) + .add(MetadataColumns.ROW_POSITION) + .add( + NestedField.required( + TRINO_MERGE_FILE_RECORD_COUNT, "file_record_count", LongType.get())) + .add( + NestedField.required( + TRINO_MERGE_PARTITION_SPEC_ID, "partition_spec_id", IntegerType.get())) + .add( + NestedField.required( + TRINO_MERGE_PARTITION_DATA, "partition_data", StringType.get())) + .build()); NestedField field = NestedField.required(TRINO_MERGE_ROW_ID, TRINO_ROW_ID_NAME, type); return getColumnHandle(field, typeManager); @@ -1976,19 +2171,24 @@ public OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandle Table icebergTable = catalog.loadTable(session, handle.getSchemaTableName()); - DeleteFiles deleteFiles = icebergTable.newDelete() - .deleteFromRowFilter(toIcebergExpression(handle.getEnforcedPredicate())); + DeleteFiles deleteFiles = + icebergTable + .newDelete() + .deleteFromRowFilter(toIcebergExpression(handle.getEnforcedPredicate())); commit(deleteFiles, session); Map summary = icebergTable.currentSnapshot().summary(); String deletedRowsStr = summary.get(DELETED_RECORDS_PROP); if (deletedRowsStr == null) { - // TODO Iceberg should guarantee this is always present (https://github.com/apache/iceberg/issues/4647) + // TODO Iceberg should guarantee this is always present + // (https://github.com/apache/iceberg/issues/4647) return OptionalLong.empty(); } long deletedRecords = Long.parseLong(deletedRowsStr); - long removedPositionDeletes = Long.parseLong(summary.getOrDefault(REMOVED_POS_DELETES_PROP, "0")); - long removedEqualityDeletes = Long.parseLong(summary.getOrDefault(REMOVED_EQ_DELETES_PROP, "0")); + long removedPositionDeletes = + Long.parseLong(summary.getOrDefault(REMOVED_POS_DELETES_PROP, "0")); + long removedEqualityDeletes = + Long.parseLong(summary.getOrDefault(REMOVED_EQ_DELETES_PROP, "0")); return OptionalLong.of(deletedRecords - removedPositionDeletes - removedEqualityDeletes); } @@ -1998,9 +2198,7 @@ public void rollback() { @Override public Optional> applyFilter( - ConnectorSession session, - ConnectorTableHandle handle, - Constraint constraint) { + ConnectorSession session, ConnectorTableHandle handle, Constraint constraint) { IcebergTableHandle table = (IcebergTableHandle) handle; Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); @@ -2008,80 +2206,91 @@ public Optional> applyFilter( BiPredicate isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId()); // Iceberg metadata columns can not be used in table scans - BiPredicate isMetadataColumn = (column, domain) -> isMetadataColumnId(column.getId()); - - TupleDomain newEnforcedConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition) - .intersect(table.getEnforcedPredicate()); - - TupleDomain remainingConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition.negate()) - .filter(isMetadataColumn.negate()); - - TupleDomain newUnenforcedConstraint = remainingConstraint - // TODO: Remove after completing https://github.com/trinodb/trino/issues/8759 - // Only applies to the unenforced constraint because structural types cannot be partition keys - .filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())) - .intersect(table.getUnenforcedPredicate()); - - if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && - newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) { + BiPredicate isMetadataColumn = + (column, domain) -> isMetadataColumnId(column.getId()); + + TupleDomain newEnforcedConstraint = + constraint + .getSummary() + .transformKeys(IcebergColumnHandle.class::cast) + .filter(isIdentityPartition) + .intersect(table.getEnforcedPredicate()); + + TupleDomain remainingConstraint = + constraint + .getSummary() + .transformKeys(IcebergColumnHandle.class::cast) + .filter(isIdentityPartition.negate()) + .filter(isMetadataColumn.negate()); + + TupleDomain newUnenforcedConstraint = + remainingConstraint + // TODO: Remove after completing https://github.com/trinodb/trino/issues/8759 + // Only applies to the unenforced constraint because structural types cannot be + // partition keys + .filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())) + .intersect(table.getUnenforcedPredicate()); + + if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) + && newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) { return Optional.empty(); } if (table instanceof AdaptHiveIcebergTableHandle) { - return Optional.of(new ConstraintApplicationResult<>( - new AdaptHiveIcebergTableHandle( - table.getSchemaName(), - table.getTableName(), - table.getTableType(), - table.getSnapshotId(), - table.getTableSchemaJson(), - table.getPartitionSpecJson(), - table.getFormatVersion(), - newUnenforcedConstraint, - newEnforcedConstraint, - table.getProjectedColumns(), - table.getNameMappingJson(), - table.getTableLocation(), - table.getStorageProperties(), - table.getRetryMode(), - table.getUpdatedColumns(), - table.isRecordScannedFiles(), - table.getMaxScannedFileSize()), - remainingConstraint.transformKeys(ColumnHandle.class::cast), - false)); - } - - return Optional.of(new ConstraintApplicationResult<>( - new IcebergTableHandle( - table.getSchemaName(), - table.getTableName(), - table.getTableType(), - table.getSnapshotId(), - table.getTableSchemaJson(), - table.getPartitionSpecJson(), - table.getFormatVersion(), - newUnenforcedConstraint, - newEnforcedConstraint, - table.getProjectedColumns(), - table.getNameMappingJson(), - table.getTableLocation(), - table.getStorageProperties(), - table.getRetryMode(), - table.getUpdatedColumns(), - table.isRecordScannedFiles(), - table.getMaxScannedFileSize()), - remainingConstraint.transformKeys(ColumnHandle.class::cast), - false)); + return Optional.of( + new ConstraintApplicationResult<>( + new AdaptHiveIcebergTableHandle( + table.getSchemaName(), + table.getTableName(), + table.getTableType(), + table.getSnapshotId(), + table.getTableSchemaJson(), + table.getPartitionSpecJson(), + table.getFormatVersion(), + newUnenforcedConstraint, + newEnforcedConstraint, + table.getProjectedColumns(), + table.getNameMappingJson(), + table.getTableLocation(), + table.getStorageProperties(), + table.getRetryMode(), + table.getUpdatedColumns(), + table.isRecordScannedFiles(), + table.getMaxScannedFileSize()), + remainingConstraint.transformKeys(ColumnHandle.class::cast), + false)); + } + + return Optional.of( + new ConstraintApplicationResult<>( + new IcebergTableHandle( + table.getSchemaName(), + table.getTableName(), + table.getTableType(), + table.getSnapshotId(), + table.getTableSchemaJson(), + table.getPartitionSpecJson(), + table.getFormatVersion(), + newUnenforcedConstraint, + newEnforcedConstraint, + table.getProjectedColumns(), + table.getNameMappingJson(), + table.getTableLocation(), + table.getStorageProperties(), + table.getRetryMode(), + table.getUpdatedColumns(), + table.isRecordScannedFiles(), + table.getMaxScannedFileSize()), + remainingConstraint.transformKeys(ColumnHandle.class::cast), + false)); } private static Set identityPartitionColumnsInAllSpecs(Table table) { // Extract identity partition column source ids common to ALL specs return table.spec().fields().stream() .filter(field -> field.transform().isIdentity()) - .filter(field -> table.specs().values().stream().allMatch(spec -> spec.fields().contains(field))) + .filter( + field -> + table.specs().values().stream().allMatch(spec -> spec.fields().contains(field))) .map(PartitionField::sourceId) .collect(toImmutableSet()); } @@ -2096,55 +2305,69 @@ public Optional> applyProjecti return Optional.empty(); } - // Create projected column representations for supported sub expressions. Simple column references and chain of + // Create projected column representations for supported sub expressions. Simple column + // references and chain of // dereferences on a variable are supported right now. - Set projectedExpressions = projections.stream() - .flatMap(expression -> extractSupportedProjectedColumns(expression).stream()) - .collect(toImmutableSet()); + Set projectedExpressions = + projections.stream() + .flatMap(expression -> extractSupportedProjectedColumns(expression).stream()) + .collect(toImmutableSet()); - Map columnProjections = projectedExpressions.stream() - .collect(toImmutableMap(identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation)); + Map columnProjections = + projectedExpressions.stream() + .collect( + toImmutableMap( + identity(), HiveApplyProjectionUtil::createProjectedColumnRepresentation)); IcebergTableHandle icebergTableHandle = (IcebergTableHandle) handle; // all references are simple variables if (columnProjections.values().stream().allMatch(ProjectedColumnRepresentation::isVariable)) { - Set projectedColumns = assignments.values().stream() - .map(IcebergColumnHandle.class::cast) - .collect(toImmutableSet()); + Set projectedColumns = + assignments.values().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableSet()); if (icebergTableHandle.getProjectedColumns().equals(projectedColumns)) { return Optional.empty(); } - List assignmentsList = assignments.entrySet().stream() - .map(assignment -> new Assignment( - assignment.getKey(), - assignment.getValue(), - ((IcebergColumnHandle) assignment.getValue()).getType())) - .collect(toImmutableList()); - - return Optional.of(new ProjectionApplicationResult<>( - icebergTableHandle.withProjectedColumns(projectedColumns), - projections, - assignmentsList, - false)); + List assignmentsList = + assignments.entrySet().stream() + .map( + assignment -> + new Assignment( + assignment.getKey(), + assignment.getValue(), + ((IcebergColumnHandle) assignment.getValue()).getType())) + .collect(toImmutableList()); + + return Optional.of( + new ProjectionApplicationResult<>( + icebergTableHandle.withProjectedColumns(projectedColumns), + projections, + assignmentsList, + false)); } Map newAssignments = new HashMap<>(); - ImmutableMap.Builder newVariablesBuilder = ImmutableMap.builder(); + ImmutableMap.Builder newVariablesBuilder = + ImmutableMap.builder(); ImmutableSet.Builder projectedColumnsBuilder = ImmutableSet.builder(); - for (Map.Entry entry : columnProjections.entrySet()) { + for (Map.Entry entry : + columnProjections.entrySet()) { ConnectorExpression expression = entry.getKey(); ProjectedColumnRepresentation projectedColumn = entry.getValue(); IcebergColumnHandle baseColumnHandle = (IcebergColumnHandle) assignments.get(projectedColumn.getVariable().getName()); IcebergColumnHandle projectedColumnHandle = - createProjectedColumnHandle(baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType()); + createProjectedColumnHandle( + baseColumnHandle, projectedColumn.getDereferenceIndices(), expression.getType()); String projectedColumnName = projectedColumnHandle.getQualifiedName(); Variable projectedColumnVariable = new Variable(projectedColumnName, expression.getType()); - Assignment newAssignment = new Assignment(projectedColumnName, projectedColumnHandle, expression.getType()); + Assignment newAssignment = + new Assignment(projectedColumnName, projectedColumnHandle, expression.getType()); newAssignments.putIfAbsent(projectedColumnName, newAssignment); newVariablesBuilder.put(expression, projectedColumnVariable); @@ -2153,16 +2376,18 @@ public Optional> applyProjecti // Modify projections to refer to new variables Map newVariables = newVariablesBuilder.buildOrThrow(); - List newProjections = projections.stream() - .map(expression -> replaceWithNewVariables(expression, newVariables)) - .collect(toImmutableList()); + List newProjections = + projections.stream() + .map(expression -> replaceWithNewVariables(expression, newVariables)) + .collect(toImmutableList()); List outputAssignments = ImmutableList.copyOf(newAssignments.values()); - return Optional.of(new ProjectionApplicationResult<>( - icebergTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), - newProjections, - outputAssignments, - false)); + return Optional.of( + new ProjectionApplicationResult<>( + icebergTableHandle.withProjectedColumns(projectedColumnsBuilder.build()), + newProjections, + outputAssignments, + false)); } private static IcebergColumnHandle createProjectedColumnHandle( @@ -2191,7 +2416,8 @@ private static IcebergColumnHandle createProjectedColumnHandle( } @Override - public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle) { + public TableStatistics getTableStatistics( + ConnectorSession session, ConnectorTableHandle tableHandle) { if (!isStatisticsEnabled(session)) { return TableStatistics.empty(); } @@ -2201,7 +2427,8 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab // If this changes, the caching logic may here may need to be revised. checkArgument(originalHandle.getUpdatedColumns().isEmpty(), "Unexpected updated columns"); checkArgument(!originalHandle.isRecordScannedFiles(), "Unexpected scanned files recording set"); - checkArgument(originalHandle.getMaxScannedFileSize().isEmpty(), "Unexpected max scanned file size set"); + checkArgument( + originalHandle.getMaxScannedFileSize().isEmpty(), "Unexpected max scanned file size set"); return tableStatisticsCache.computeIfAbsent( new IcebergTableHandle( @@ -2224,12 +2451,14 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab originalHandle.getMaxScannedFileSize()), handle -> { Table icebergTable = catalog.loadTable(session, handle.getSchemaTableName()); - return TableStatisticsReader.getTableStatistics(typeManager, session, handle, icebergTable); + return TableStatisticsReader.getTableStatistics( + typeManager, session, handle, icebergTable); }); } @Override - public void setTableAuthorization(ConnectorSession session, SchemaTableName tableName, TrinoPrincipal principal) { + public void setTableAuthorization( + ConnectorSession session, SchemaTableName tableName, TrinoPrincipal principal) { catalog.setTablePrincipal(session, tableName, principal); } @@ -2257,7 +2486,8 @@ public void dropMaterializedView(ConnectorSession session, SchemaTableName viewN } @Override - public boolean delegateMaterializedViewRefreshToConnector(ConnectorSession session, SchemaTableName viewName) { + public boolean delegateMaterializedViewRefreshToConnector( + ConnectorSession session, SchemaTableName viewName) { return false; } @@ -2286,31 +2516,36 @@ public Optional finishRefreshMaterializedView( Table icebergTable = transaction.table(); // delete before insert .. simulating overwrite - transaction.newDelete() - .deleteFromRowFilter(Expressions.alwaysTrue()) - .commit(); - - List commitTasks = fragments.stream() - .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) - .collect(toImmutableList()); - - Type[] partitionColumnTypes = icebergTable.spec().fields().stream() - .map(field -> field.transform().getResultType( - icebergTable.schema().findType(field.sourceId()))) - .toArray(Type[]::new); + transaction.newDelete().deleteFromRowFilter(Expressions.alwaysTrue()).commit(); + + List commitTasks = + fragments.stream() + .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) + .collect(toImmutableList()); + + Type[] partitionColumnTypes = + icebergTable.spec().fields().stream() + .map( + field -> + field + .transform() + .getResultType(icebergTable.schema().findType(field.sourceId()))) + .toArray(Type[]::new); AppendFiles appendFiles = transaction.newFastAppend(); ImmutableSet.Builder writtenFiles = ImmutableSet.builder(); for (CommitTaskData task : commitTasks) { - DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()) - .withPath(task.getPath()) - .withFileSizeInBytes(task.getFileSizeInBytes()) - .withFormat(table.getFileFormat().toIceberg()) - .withMetrics(task.getMetrics().metrics()); + DataFiles.Builder builder = + DataFiles.builder(icebergTable.spec()) + .withPath(task.getPath()) + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withFormat(table.getFileFormat().toIceberg()) + .withMetrics(task.getMetrics().metrics()); if (!icebergTable.spec().fields().isEmpty()) { - String partitionDataJson = task.getPartitionDataJson() - .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); + String partitionDataJson = + task.getPartitionDataJson() + .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); } @@ -2318,43 +2553,47 @@ public Optional finishRefreshMaterializedView( writtenFiles.add(task.getPath()); } - String dependencies = sourceTableHandles.stream() - .map(handle -> { - if (!(handle instanceof IcebergTableHandle icebergHandle)) { - return UNKNOWN_SNAPSHOT_TOKEN; - } - return icebergHandle.getSchemaTableName() + "=" + - icebergHandle.getSnapshotId().map(Object.class::cast).orElse(""); - }) - .distinct() - .collect(joining(",")); + String dependencies = + sourceTableHandles.stream() + .map( + handle -> { + if (!(handle instanceof IcebergTableHandle icebergHandle)) { + return UNKNOWN_SNAPSHOT_TOKEN; + } + return icebergHandle.getSchemaTableName() + + "=" + + icebergHandle.getSnapshotId().map(Object.class::cast).orElse(""); + }) + .distinct() + .collect(joining(",")); // try to leave as little garbage as possible behind if (table.getRetryMode() != NO_RETRIES) { cleanExtraOutputFiles(session, writtenFiles.build()); } - // Update the 'dependsOnTables' property that tracks tables on which the materialized view depends + // Update the 'dependsOnTables' property that tracks tables on which the materialized view + // depends // and the corresponding snapshot ids of the tables appendFiles.set(DEPENDS_ON_TABLES, dependencies); commit(appendFiles, session); transaction.commitTransaction(); transaction = null; - return Optional.of(new HiveWrittenPartitions(commitTasks.stream() - .map(CommitTaskData::getPath) - .collect(toImmutableList()))); + return Optional.of( + new HiveWrittenPartitions( + commitTasks.stream().map(CommitTaskData::getPath).collect(toImmutableList()))); } @Override - public List listMaterializedViews(ConnectorSession session, Optional schemaName) { + public List listMaterializedViews( + ConnectorSession session, Optional schemaName) { return catalog.listMaterializedViews(session, schemaName); } @Override public Map getMaterializedViews( - ConnectorSession session, - Optional schemaName) { + ConnectorSession session, Optional schemaName) { Map materializedViews = new HashMap<>(); for (SchemaTableName name : listMaterializedViews(session, schemaName)) { try { @@ -2370,21 +2609,23 @@ public Map getMaterialized @Override public Optional getMaterializedView( - ConnectorSession session, - SchemaTableName viewName) { + ConnectorSession session, SchemaTableName viewName) { return catalog.getMaterializedView(session, viewName); } @Override - public void renameMaterializedView(ConnectorSession session, SchemaTableName source, SchemaTableName target) { + public void renameMaterializedView( + ConnectorSession session, SchemaTableName source, SchemaTableName target) { // TODO (https://github.com/trinodb/trino/issues/9594) support rename across schemas if (!source.getSchemaName().equals(target.getSchemaName())) { - throw new TrinoException(NOT_SUPPORTED, "Materialized View rename across schemas is not supported"); + throw new TrinoException( + NOT_SUPPORTED, "Materialized View rename across schemas is not supported"); } catalog.renameMaterializedView(session, source, target); } - public Optional getTableToken(ConnectorSession session, ConnectorTableHandle tableHandle) { + public Optional getTableToken( + ConnectorSession session, ConnectorTableHandle tableHandle) { IcebergTableHandle table = (IcebergTableHandle) tableHandle; Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); return Optional.ofNullable(icebergTable.currentSnapshot()) @@ -2404,8 +2645,7 @@ public boolean isTableCurrent( @Override public MaterializedViewFreshness getMaterializedViewFreshness( - ConnectorSession session, - SchemaTableName materializedViewName) { + ConnectorSession session, SchemaTableName materializedViewName) { Optional materializedViewDefinition = getMaterializedView(session, materializedViewName); if (materializedViewDefinition.isEmpty()) { @@ -2413,13 +2653,20 @@ public MaterializedViewFreshness getMaterializedViewFreshness( return new MaterializedViewFreshness(STALE); } - SchemaTableName storageTableName = materializedViewDefinition.get().getStorageTable() - .map(CatalogSchemaTableName::getSchemaTableName) - .orElseThrow(() -> new IllegalStateException( - "Storage table missing in definition of materialized view " + materializedViewName)); + SchemaTableName storageTableName = + materializedViewDefinition + .get() + .getStorageTable() + .map(CatalogSchemaTableName::getSchemaTableName) + .orElseThrow( + () -> + new IllegalStateException( + "Storage table missing in definition of materialized view " + + materializedViewName)); Table icebergTable = catalog.loadTable(session, storageTableName); - String dependsOnTables = icebergTable.currentSnapshot().summary().getOrDefault(DEPENDS_ON_TABLES, ""); + String dependsOnTables = + icebergTable.currentSnapshot().summary().getOrDefault(DEPENDS_ON_TABLES, ""); if (dependsOnTables.isEmpty()) { // Information missing. While it's "unknown" whether storage is stale, // we return "stale": under no normal circumstances dependsOnTables should be missing. @@ -2472,7 +2719,8 @@ public MaterializedViewFreshness getMaterializedViewFreshness( } @Override - public boolean supportsReportingWrittenBytes(ConnectorSession session, ConnectorTableHandle connectorTableHandle) { + public boolean supportsReportingWrittenBytes( + ConnectorSession session, ConnectorTableHandle connectorTableHandle) { return true; } @@ -2488,15 +2736,18 @@ public boolean supportsReportingWrittenBytes( public void setColumnComment( ConnectorSession session, ConnectorTableHandle tableHandle, - ColumnHandle column, Optional comment) { + ColumnHandle column, + Optional comment) { catalog.updateColumnComment( session, ((IcebergTableHandle) tableHandle).getSchemaTableName(), - ((IcebergColumnHandle) column).getColumnIdentity(), comment); + ((IcebergColumnHandle) column).getColumnIdentity(), + comment); } @Override - public Optional redirectTable(ConnectorSession session, SchemaTableName tableName) { + public Optional redirectTable( + ConnectorSession session, SchemaTableName tableName) { return catalog.redirectTable(session, tableName); } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSource.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSource.java index 78118672f0..dab120b40e 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSource.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSource.java @@ -18,6 +18,12 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Throwables.throwIfInstanceOf; +import static io.trino.plugin.base.util.Closables.closeAllSuppress; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; +import static java.util.Objects.requireNonNull; + import com.netease.arctic.io.reader.DeleteFilter; import com.netease.arctic.trino.delete.TrinoRow; import io.trino.plugin.hive.ReaderProjectionsAdapter; @@ -30,6 +36,7 @@ import org.apache.iceberg.io.CloseableIterable; import javax.annotation.Nullable; + import java.io.IOException; import java.io.UncheckedIOException; import java.util.List; @@ -37,17 +44,11 @@ import java.util.OptionalLong; import java.util.function.Supplier; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Throwables.throwIfInstanceOf; -import static io.trino.plugin.base.util.Closables.closeAllSuppress; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; -import static java.util.Objects.requireNonNull; - /** - * Iceberg original IcebergPageSource has some problems for arctic, such as iceberg version, table type. + * Iceberg original IcebergPageSource has some problems for arctic, such as iceberg version, table + * type. */ -public class IcebergPageSource - implements ConnectorPageSource { +public class IcebergPageSource implements ConnectorPageSource { private final Type[] columnTypes; private final int[] expectedColumnIndexes; private final ConnectorPageSource delegate; @@ -55,8 +56,7 @@ public class IcebergPageSource private final Optional> deleteFilter; private final Supplier positionDeleteSinkSupplier; - @Nullable - private IcebergPositionDeletePageSink positionDeleteSink; + @Nullable private IcebergPositionDeletePageSink positionDeleteSink; public IcebergPageSource( List expectedColumns, @@ -66,7 +66,8 @@ public IcebergPageSource( Optional> deleteFilter, Supplier positionDeleteSinkSupplier) { // expectedColumns should contain columns which should be in the final Page - // requiredColumns should include all expectedColumns as well as any columns needed by the DeleteFilter + // requiredColumns should include all expectedColumns as well as any columns needed by the + // DeleteFilter requireNonNull(expectedColumns, "expectedColumns is null"); requireNonNull(requiredColumns, "requiredColumns is null"); this.expectedColumnIndexes = new int[expectedColumns.size()]; @@ -77,13 +78,13 @@ public IcebergPageSource( expectedColumnIndexes[i] = i; } - this.columnTypes = requiredColumns.stream() - .map(IcebergColumnHandle::getType) - .toArray(Type[]::new); + this.columnTypes = + requiredColumns.stream().map(IcebergColumnHandle::getType).toArray(Type[]::new); this.delegate = requireNonNull(delegate, "delegate is null"); this.projectionsAdapter = requireNonNull(projectionsAdapter, "projectionsAdapter is null"); this.deleteFilter = requireNonNull(deleteFilter, "deleteFilter is null"); - this.positionDeleteSinkSupplier = requireNonNull(positionDeleteSinkSupplier, "positionDeleteSinkSupplier is null"); + this.positionDeleteSinkSupplier = + requireNonNull(positionDeleteSinkSupplier, "positionDeleteSinkSupplier is null"); } @Override @@ -120,16 +121,24 @@ public Page getNextPage() { if (deleteFilter.isPresent()) { int positionCount = dataPage.getPositionCount(); int[] positionsToKeep = new int[positionCount]; - try (CloseableIterable filteredRows = deleteFilter.get() - .filter(CloseableIterable.withNoopClose(TrinoRow.fromPage(columnTypes, dataPage, positionCount)))) { + try (CloseableIterable filteredRows = + deleteFilter + .get() + .filter( + CloseableIterable.withNoopClose( + TrinoRow.fromPage(columnTypes, dataPage, positionCount)))) { int positionsToKeepCount = 0; for (TrinoRow rowToKeep : filteredRows) { positionsToKeep[positionsToKeepCount] = rowToKeep.getPosition(); positionsToKeepCount++; } - dataPage = dataPage.getPositions(positionsToKeep, 0, positionsToKeepCount).getColumns(expectedColumnIndexes); + dataPage = + dataPage + .getPositions(positionsToKeep, 0, positionsToKeepCount) + .getColumns(expectedColumnIndexes); } catch (IOException e) { - throw new TrinoException(ICEBERG_BAD_DATA, "Failed to filter rows during merge-on-read operation", e); + throw new TrinoException( + ICEBERG_BAD_DATA, "Failed to filter rows during merge-on-read operation", e); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSourceProvider.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSourceProvider.java index 75a928215b..67147d02af 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSourceProvider.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergPageSourceProvider.java @@ -18,6 +18,67 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Maps.uniqueIndex; +import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; +import static io.trino.orc.OrcReader.INITIAL_BATCH_SIZE; +import static io.trino.orc.OrcReader.ProjectedLayout; +import static io.trino.orc.OrcReader.fullyProjectedLayout; +import static io.trino.parquet.ParquetTypeUtils.getColumnIO; +import static io.trino.parquet.ParquetTypeUtils.getDescriptors; +import static io.trino.parquet.predicate.PredicateUtils.buildPredicate; +import static io.trino.parquet.predicate.PredicateUtils.predicateMatches; +import static io.trino.parquet.reader.ParquetReaderColumn.getParquetReaderFields; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_FILE_RECORD_COUNT; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_DATA; +import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_SPEC_ID; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_MODIFIED_TIME; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_PATH; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockRowCount; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetOptimizedReaderEnabled; +import static io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata; +import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; +import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; +import static io.trino.plugin.iceberg.IcebergUtil.getColumns; +import static io.trino.plugin.iceberg.IcebergUtil.getLocationProvider; +import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; +import static io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE; +import static io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY; +import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.trino.spi.predicate.Utils.nativeValueToBlock; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static io.trino.spi.type.UuidType.UUID; +import static java.lang.String.format; +import static java.util.Locale.ENGLISH; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.mapping; +import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toUnmodifiableList; +import static org.apache.iceberg.MetadataColumns.ROW_POSITION; +import static org.joda.time.DateTimeZone.UTC; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.graph.Traverser; @@ -116,7 +177,9 @@ import org.apache.parquet.io.MessageColumnIO; import org.apache.parquet.schema.MessageType; import org.joda.time.DateTimeZone; + import javax.inject.Inject; + import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -129,72 +192,12 @@ import java.util.TimeZone; import java.util.function.Function; import java.util.function.Supplier; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Maps.uniqueIndex; -import static io.airlift.slice.Slices.utf8Slice; -import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; -import static io.trino.orc.OrcReader.INITIAL_BATCH_SIZE; -import static io.trino.orc.OrcReader.ProjectedLayout; -import static io.trino.orc.OrcReader.fullyProjectedLayout; -import static io.trino.parquet.ParquetTypeUtils.getColumnIO; -import static io.trino.parquet.ParquetTypeUtils.getDescriptors; -import static io.trino.parquet.predicate.PredicateUtils.buildPredicate; -import static io.trino.parquet.predicate.PredicateUtils.predicateMatches; -import static io.trino.parquet.reader.ParquetReaderColumn.getParquetReaderFields; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_FILE_RECORD_COUNT; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_DATA; -import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_SPEC_ID; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CURSOR_ERROR; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_MODIFIED_TIME; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.FILE_PATH; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxBufferSize; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcStreamBufferSize; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockRowCount; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getParquetMaxReadBlockSize; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isOrcNestedLazy; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isParquetOptimizedReaderEnabled; -import static io.trino.plugin.iceberg.IcebergSessionProperties.isUseFileSizeFromMetadata; -import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; -import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; -import static io.trino.plugin.iceberg.IcebergUtil.getColumns; -import static io.trino.plugin.iceberg.IcebergUtil.getLocationProvider; -import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; -import static io.trino.plugin.iceberg.TypeConverter.ICEBERG_BINARY_TYPE; -import static io.trino.plugin.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY; -import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.trino.spi.predicate.Utils.nativeValueToBlock; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.BooleanType.BOOLEAN; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static io.trino.spi.type.UuidType.UUID; -import static java.lang.String.format; -import static java.util.Locale.ENGLISH; -import static java.util.Objects.requireNonNull; -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.mapping; -import static java.util.stream.Collectors.toList; -import static java.util.stream.Collectors.toUnmodifiableList; -import static org.apache.iceberg.MetadataColumns.ROW_POSITION; -import static org.joda.time.DateTimeZone.UTC; /** - * Extend IcebergPageSourceProvider to provider idToConstant that the map of columns id to constant value + * Extend IcebergPageSourceProvider to provider idToConstant that the map of columns id to constant + * value */ -public class IcebergPageSourceProvider - implements ConnectorPageSourceProvider { +public class IcebergPageSourceProvider implements ConnectorPageSourceProvider { private static final String AVRO_FIELD_ID = "field-id"; private final TrinoFileSystemFactory fileSystemFactory; private final FileFormatDataSourceStats fileFormatDataSourceStats; @@ -214,8 +217,10 @@ public IcebergPageSourceProvider( JsonCodec jsonCodec, IcebergFileWriterFactory fileWriterFactory) { this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); - this.fileFormatDataSourceStats = requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null"); - this.orcReaderOptions = requireNonNull(orcReaderConfig, "orcReaderConfig is null").toOrcReaderOptions(); + this.fileFormatDataSourceStats = + requireNonNull(fileFormatDataSourceStats, "fileFormatDataSourceStats is null"); + this.orcReaderOptions = + requireNonNull(orcReaderConfig, "orcReaderConfig is null").toOrcReaderOptions(); this.parquetReaderOptions = requireNonNull(parquetReaderConfig, "parquetReaderConfig is null").toParquetReaderOptions(); this.typeManager = requireNonNull(typeManager, "typeManager is null"); @@ -239,13 +244,25 @@ public ConnectorPageSource createPageSource( idToConstant.put( MetadataColumns.CHANGE_ACTION_ID, Optional.ofNullable(icebergSplit.getFileType()) - .map(s -> s == DataFileType.EQ_DELETE_FILE ? ChangeAction.DELETE.name() : ChangeAction.INSERT.name())); + .map( + s -> + s == DataFileType.EQ_DELETE_FILE + ? ChangeAction.DELETE.name() + : ChangeAction.INSERT.name())); DateTimeZone dateTimeZone = UTC; if (connectorTable instanceof AdaptHiveIcebergTableHandle) { dateTimeZone = DateTimeZone.forID(TimeZone.getDefault().getID()); } - return createPageSource(transaction, session, connectorSplit, connectorTable, columns, dynamicFilter, - idToConstant, true, dateTimeZone); + return createPageSource( + transaction, + session, + connectorSplit, + connectorTable, + columns, + dynamicFilter, + idToConstant, + true, + dateTimeZone); } public ConnectorPageSource createPageSource( @@ -261,9 +278,8 @@ public ConnectorPageSource createPageSource( IcebergSplit split = (IcebergSplit) connectorSplit; IcebergTableHandle table = (IcebergTableHandle) connectorTable; - List icebergColumns = columns.stream() - .map(IcebergColumnHandle.class::cast) - .collect(toImmutableList()); + List icebergColumns = + columns.stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList()); TrinoFileSystem fileSystem = fileSystemFactory.create(session); FileIO fileIO = fileSystem.toFileIo(); @@ -271,28 +287,30 @@ public ConnectorPageSource createPageSource( Schema tableSchema = SchemaParser.fromJson(table.getTableSchemaJson()); // Creating a DeleteFilter with no requestedSchema ensures `deleteFilterRequiredSchema` // is only columns needed by the filter. - List deleteFilterRequiredSchema = getColumns( - useIcebergDelete ? - new TrinoDeleteFilter( - dummyFileScanTask, - tableSchema, - ImmutableList.of(), - fileIO) - .requiredSchema() : tableSchema, - typeManager); - - PartitionSpec partitionSpec = PartitionSpecParser.fromJson(tableSchema, split.getPartitionSpecJson()); - org.apache.iceberg.types.Type[] partitionColumnTypes = partitionSpec.fields().stream() - .map(field -> field.transform().getResultType(tableSchema.findType(field.sourceId()))) - .toArray(org.apache.iceberg.types.Type[]::new); - PartitionData partitionData = PartitionData.fromJson(split.getPartitionDataJson(), partitionColumnTypes); + List deleteFilterRequiredSchema = + getColumns( + useIcebergDelete + ? new TrinoDeleteFilter(dummyFileScanTask, tableSchema, ImmutableList.of(), fileIO) + .requiredSchema() + : tableSchema, + typeManager); + + PartitionSpec partitionSpec = + PartitionSpecParser.fromJson(tableSchema, split.getPartitionSpecJson()); + org.apache.iceberg.types.Type[] partitionColumnTypes = + partitionSpec.fields().stream() + .map(field -> field.transform().getResultType(tableSchema.findType(field.sourceId()))) + .toArray(org.apache.iceberg.types.Type[]::new); + PartitionData partitionData = + PartitionData.fromJson(split.getPartitionDataJson(), partitionColumnTypes); Map> partitionKeys = getPartitionKeys(partitionData, partitionSpec); - //for arctic + // for arctic if (idToConstant != null) { - partitionKeys = ImmutableMap.>builder() - .putAll(partitionKeys) - .putAll(idToConstant) - .buildOrThrow(); + partitionKeys = + ImmutableMap.>builder() + .putAll(partitionKeys) + .putAll(idToConstant) + .buildOrThrow(); } ImmutableList.Builder requiredColumnsBuilder = ImmutableList.builder(); @@ -302,70 +320,81 @@ public ConnectorPageSource createPageSource( .forEach(requiredColumnsBuilder::add); List requiredColumns = requiredColumnsBuilder.build(); - TupleDomain effectivePredicate = table.getUnenforcedPredicate() - .intersect(dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)) - .simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD); + TupleDomain effectivePredicate = + table + .getUnenforcedPredicate() + .intersect( + dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast)) + .simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD); if (effectivePredicate.isNone()) { return new EmptyPageSource(); } - TrinoInputFile inputfile = isUseFileSizeFromMetadata(session) ? - fileSystem.newInputFile(split.getPath(), split.getFileSize()) - : fileSystem.newInputFile(split.getPath()); - - IcebergPageSourceProvider.ReaderPageSourceWithRowPositions readerPageSourceWithRowPositions = createDataPageSource( - session, - fileSystem, - inputfile, - split.getStart(), - split.getLength(), - split.getFileRecordCount(), - partitionSpec.specId(), - split.getPartitionDataJson(), - split.getFileFormat(), - SchemaParser.fromJson(table.getTableSchemaJson()), - requiredColumns, - effectivePredicate, - table.getNameMappingJson().map(NameMappingParser::fromJson), - partitionKeys, - dateTimeZone); - ReaderPageSource dataPageSource = readerPageSourceWithRowPositions.getReaderPageSource(); + TrinoInputFile inputfile = + isUseFileSizeFromMetadata(session) + ? fileSystem.newInputFile(split.getPath(), split.getFileSize()) + : fileSystem.newInputFile(split.getPath()); - Optional projectionsAdapter = dataPageSource.getReaderColumns().map(readerColumns -> - new ReaderProjectionsAdapter( + IcebergPageSourceProvider.ReaderPageSourceWithRowPositions readerPageSourceWithRowPositions = + createDataPageSource( + session, + fileSystem, + inputfile, + split.getStart(), + split.getLength(), + split.getFileRecordCount(), + partitionSpec.specId(), + split.getPartitionDataJson(), + split.getFileFormat(), + SchemaParser.fromJson(table.getTableSchemaJson()), requiredColumns, - readerColumns, - column -> ((IcebergColumnHandle) column).getType(), - IcebergPageSourceProvider::applyProjection)); - - DeleteFilter deleteFilter = new TrinoDeleteFilter( - dummyFileScanTask, - tableSchema, - requiredColumns, - fileIO); + effectivePredicate, + table.getNameMappingJson().map(NameMappingParser::fromJson), + partitionKeys, + dateTimeZone); + ReaderPageSource dataPageSource = readerPageSourceWithRowPositions.getReaderPageSource(); - Optional partition = partitionSpec.isUnpartitioned() ? Optional.empty() : Optional.of(partitionData); + Optional projectionsAdapter = + dataPageSource + .getReaderColumns() + .map( + readerColumns -> + new ReaderProjectionsAdapter( + requiredColumns, + readerColumns, + column -> ((IcebergColumnHandle) column).getType(), + IcebergPageSourceProvider::applyProjection)); + + DeleteFilter deleteFilter = + new TrinoDeleteFilter(dummyFileScanTask, tableSchema, requiredColumns, fileIO); + + Optional partition = + partitionSpec.isUnpartitioned() ? Optional.empty() : Optional.of(partitionData); LocationProvider locationProvider = - getLocationProvider(table.getSchemaTableName(), table.getTableLocation(), table.getStorageProperties()); - Supplier positionDeleteSink = () -> new IcebergPositionDeletePageSink( - split.getPath(), - partitionSpec, - partition, - locationProvider, - fileWriterFactory, - fileSystem, - jsonCodec, - session, - split.getFileFormat(), - table.getStorageProperties(), - split.getFileRecordCount()); + getLocationProvider( + table.getSchemaTableName(), table.getTableLocation(), table.getStorageProperties()); + Supplier positionDeleteSink = + () -> + new IcebergPositionDeletePageSink( + split.getPath(), + partitionSpec, + partition, + locationProvider, + fileWriterFactory, + fileSystem, + jsonCodec, + session, + split.getFileFormat(), + table.getStorageProperties(), + split.getFileRecordCount()); return new IcebergPageSource( icebergColumns, requiredColumns, dataPageSource.get(), projectionsAdapter, - // Optional.of(deleteFilter).filter(filter -> filter.hasPosDeletes() || filter.hasEqDeletes()), + // Optional.of(deleteFilter).filter(filter -> filter.hasPosDeletes() || + // filter.hasEqDeletes()), // In order to be compatible with iceberg version 0.12 useIcebergDelete ? Optional.of(deleteFilter) : Optional.empty(), positionDeleteSink); @@ -442,7 +471,8 @@ public ReaderPageSourceWithRowPositions createDataPageSource( nameMapping, dataColumns); default: - throw new TrinoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat); + throw new TrinoException( + NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat); } } @@ -464,34 +494,49 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( try { orcDataSource = new TrinoOrcDataSource(inputFile, options, stats); - OrcReader reader = OrcReader.createOrcReader(orcDataSource, options) - .orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length")); + OrcReader reader = + OrcReader.createOrcReader(orcDataSource, options) + .orElseThrow(() -> new TrinoException(ICEBERG_BAD_DATA, "ORC file is zero length")); List fileColumns = reader.getRootColumn().getNestedColumns(); if (nameMapping.isPresent() && !hasIds(reader.getRootColumn())) { - fileColumns = fileColumns.stream() - .map(orcColumn -> setMissingFieldIds( - orcColumn, nameMapping.get(), ImmutableList.of(orcColumn.getColumnName()))) - .collect(toImmutableList()); + fileColumns = + fileColumns.stream() + .map( + orcColumn -> + setMissingFieldIds( + orcColumn, + nameMapping.get(), + ImmutableList.of(orcColumn.getColumnName()))) + .collect(toImmutableList()); } Map fileColumnsByIcebergId = mapIdsToOrcFileColumns(fileColumns); - TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder() - .setBloomFiltersEnabled(options.isBloomFiltersEnabled()); - Map effectivePredicateDomains = effectivePredicate.getDomains() - .orElseThrow(() -> new IllegalArgumentException("Effective predicate is none")); + TupleDomainOrcPredicateBuilder predicateBuilder = + TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()); + Map effectivePredicateDomains = + effectivePredicate + .getDomains() + .orElseThrow(() -> new IllegalArgumentException("Effective predicate is none")); Optional columnProjections = projectColumns(columns); - Map>> projectionsByFieldId = columns.stream() - .collect(groupingBy( - column -> column.getBaseColumnIdentity().getId(), - mapping(IcebergColumnHandle::getPath, toUnmodifiableList()))); - - List readColumns = columnProjections - .map(readerColumns -> (List) readerColumns.get() - .stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())) - .orElse(columns); + Map>> projectionsByFieldId = + columns.stream() + .collect( + groupingBy( + column -> column.getBaseColumnIdentity().getId(), + mapping(IcebergColumnHandle::getPath, toUnmodifiableList()))); + + List readColumns = + columnProjections + .map( + readerColumns -> + (List) + readerColumns.get().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableList())) + .orElse(columns); List fileReadColumns = new ArrayList<>(readColumns.size()); List fileReadTypes = new ArrayList<>(readColumns.size()); List projectedLayouts = new ArrayList<>(readColumns.size()); @@ -502,12 +547,18 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( OrcColumn orcColumn = fileColumnsByIcebergId.get(column.getId()); if (column.isIsDeletedColumn()) { - columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(BOOLEAN, false))); + columnAdaptations.add( + ColumnAdaptation.constantColumn(nativeValueToBlock(BOOLEAN, false))); } else if (partitionKeys.containsKey(column.getId())) { Type trinoType = column.getType(); - columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock( - trinoType, - deserializePartitionValue(trinoType, partitionKeys.get(column.getId()).orElse(null), column.getName())))); + columnAdaptations.add( + ColumnAdaptation.constantColumn( + nativeValueToBlock( + trinoType, + deserializePartitionValue( + trinoType, + partitionKeys.get(column.getId()).orElse(null), + column.getName())))); } else if (column.isPathColumn()) { columnAdaptations.add( ColumnAdaptation.constantColumn( @@ -519,34 +570,39 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(inputFile.modificationTime(), UTC_KEY)))); } else if (column.isUpdateRowIdColumn() || column.isMergeRowIdColumn()) { - // $row_id is a composite of multiple physical columns. It is assembled by the IcebergPageSource + // $row_id is a composite of multiple physical columns. It is assembled by the + // IcebergPageSource columnAdaptations.add(ColumnAdaptation.nullColumn(column.getType())); } else if (column.isRowPositionColumn()) { columnAdaptations.add(ColumnAdaptation.positionColumn()); } else if (column.getId() == TRINO_MERGE_FILE_RECORD_COUNT) { - columnAdaptations.add(ColumnAdaptation.constantColumn(nativeValueToBlock(column.getType(), fileRecordCount))); + columnAdaptations.add( + ColumnAdaptation.constantColumn( + nativeValueToBlock(column.getType(), fileRecordCount))); } else if (column.getId() == TRINO_MERGE_PARTITION_SPEC_ID) { columnAdaptations.add( - ColumnAdaptation.constantColumn(nativeValueToBlock(column.getType(), (long) partitionSpecId))); + ColumnAdaptation.constantColumn( + nativeValueToBlock(column.getType(), (long) partitionSpecId))); } else if (column.getId() == TRINO_MERGE_PARTITION_DATA) { columnAdaptations.add( - ColumnAdaptation.constantColumn(nativeValueToBlock(column.getType(), utf8Slice(partitionData)))); + ColumnAdaptation.constantColumn( + nativeValueToBlock(column.getType(), utf8Slice(partitionData)))); } else if (orcColumn != null) { Type readType = getOrcReadType(column.getType(), typeManager); - if (column.getType() == UUID && !"UUID".equals(orcColumn.getAttributes().get(ICEBERG_BINARY_TYPE))) { + if (column.getType() == UUID + && !"UUID".equals(orcColumn.getAttributes().get(ICEBERG_BINARY_TYPE))) { throw new TrinoException( ICEBERG_BAD_DATA, format( "Expected ORC column for UUID data to be annotated with %s=UUID: %s", - ICEBERG_BINARY_TYPE, - orcColumn) - ); + ICEBERG_BINARY_TYPE, orcColumn)); } List> fieldIdProjections = projectionsByFieldId.get(column.getId()); - ProjectedLayout projectedLayout = IcebergPageSourceProvider - .IcebergOrcProjectedLayout.createProjectedLayout(orcColumn, fieldIdProjections); + ProjectedLayout projectedLayout = + IcebergPageSourceProvider.IcebergOrcProjectedLayout.createProjectedLayout( + orcColumn, fieldIdProjections); int sourceIndex = fileReadColumns.size(); columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex)); @@ -554,11 +610,12 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( fileReadTypes.add(readType); projectedLayouts.add(projectedLayout); - for (Map.Entry domainEntry : effectivePredicateDomains.entrySet()) { + for (Map.Entry domainEntry : + effectivePredicateDomains.entrySet()) { IcebergColumnHandle predicateColumn = domainEntry.getKey(); OrcColumn predicateOrcColumn = fileColumnsByIcebergId.get(predicateColumn.getId()); - if (predicateOrcColumn != null && - column.getColumnIdentity().equals(predicateColumn.getBaseColumnIdentity())) { + if (predicateOrcColumn != null + && column.getColumnIdentity().equals(predicateColumn.getBaseColumnIdentity())) { predicateBuilder.addColumn(predicateOrcColumn.getColumnId(), domainEntry.getValue()); } } @@ -569,18 +626,19 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext(); OrcDataSourceId orcDataSourceId = orcDataSource.getId(); - OrcRecordReader recordReader = reader.createRecordReader( - fileReadColumns, - fileReadTypes, - projectedLayouts, - predicateBuilder.build(), - start, - length, - UTC, - memoryUsage, - INITIAL_BATCH_SIZE, - exception -> handleException(orcDataSourceId, exception), - new IcebergPageSourceProvider.IdBasedFieldMapperFactory(readColumns)); + OrcRecordReader recordReader = + reader.createRecordReader( + fileReadColumns, + fileReadTypes, + projectedLayouts, + predicateBuilder.build(), + start, + length, + UTC, + memoryUsage, + INITIAL_BATCH_SIZE, + exception -> handleException(orcDataSourceId, exception), + new IcebergPageSourceProvider.IdBasedFieldMapperFactory(readColumns)); return new ReaderPageSourceWithRowPositions( new ReaderPageSource( @@ -609,9 +667,10 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( if (e instanceof TrinoException) { throw (TrinoException) e; } - String message = format( - "Error opening Iceberg split %s (offset=%s, length=%s): %s", - inputFile.location(), start, length, e.getMessage()); + String message = + format( + "Error opening Iceberg split %s (offset=%s, length=%s): %s", + inputFile.location(), start, length, e.getMessage()); if (e instanceof BlockMissingException) { throw new TrinoException(ICEBERG_MISSING_DATA, message, e); } @@ -627,11 +686,12 @@ private static boolean hasIds(OrcColumn column) { return column.getNestedColumns().stream().anyMatch(IcebergPageSourceProvider::hasIds); } - private static OrcColumn setMissingFieldIds(OrcColumn column, NameMapping nameMapping, List qualifiedPath) { + private static OrcColumn setMissingFieldIds( + OrcColumn column, NameMapping nameMapping, List qualifiedPath) { MappedField mappedField = nameMapping.find(qualifiedPath); - ImmutableMap.Builder attributes = ImmutableMap.builder() - .putAll(column.getAttributes()); + ImmutableMap.Builder attributes = + ImmutableMap.builder().putAll(column.getAttributes()); if (mappedField != null && mappedField.id() != null) { attributes.put(ORC_ICEBERG_ID_KEY, String.valueOf(mappedField.id())); } @@ -643,25 +703,29 @@ private static OrcColumn setMissingFieldIds(OrcColumn column, NameMapping nameMa column.getColumnType(), column.getOrcDataSourceId(), column.getNestedColumns().stream() - .map(nestedColumn -> { - ImmutableList.Builder nextQualifiedPath = ImmutableList.builder() - .addAll(qualifiedPath); - if (column.getColumnType() == OrcType.OrcTypeKind.LIST) { - // The Trino ORC reader uses "item" for list element names, but the NameMapper expects "element" - nextQualifiedPath.add("element"); - } else { - nextQualifiedPath.add(nestedColumn.getColumnName()); - } - return setMissingFieldIds(nestedColumn, nameMapping, nextQualifiedPath.build()); - }) + .map( + nestedColumn -> { + ImmutableList.Builder nextQualifiedPath = + ImmutableList.builder().addAll(qualifiedPath); + if (column.getColumnType() == OrcType.OrcTypeKind.LIST) { + // The Trino ORC reader uses "item" for list element names, but the NameMapper + // expects "element" + nextQualifiedPath.add("element"); + } else { + nextQualifiedPath.add(nestedColumn.getColumnName()); + } + return setMissingFieldIds(nestedColumn, nameMapping, nextQualifiedPath.build()); + }) .collect(toImmutableList()), attributes.buildOrThrow()); } /** - * Gets the index based dereference chain to get from the readColumnHandle to the expectedColumnHandle + * Gets the index based dereference chain to get from the readColumnHandle to the + * expectedColumnHandle */ - private static List applyProjection(ColumnHandle expectedColumnHandle, ColumnHandle readColumnHandle) { + private static List applyProjection( + ColumnHandle expectedColumnHandle, ColumnHandle readColumnHandle) { IcebergColumnHandle expectedColumn = (IcebergColumnHandle) expectedColumnHandle; IcebergColumnHandle readColumn = (IcebergColumnHandle) readColumnHandle; checkState(readColumn.isBaseColumn(), "Read column path must be a base column"); @@ -681,18 +745,21 @@ private static Map mapIdsToOrcFileColumns(List co ImmutableMap.Builder columnsById = ImmutableMap.builder(); Traverser.forTree(OrcColumn::getNestedColumns) .depthFirstPreOrder(columns) - .forEach(column -> { - String fieldId = column.getAttributes().get(ORC_ICEBERG_ID_KEY); - if (fieldId != null) { - columnsById.put(Integer.parseInt(fieldId), column); - } - }); + .forEach( + column -> { + String fieldId = column.getAttributes().get(ORC_ICEBERG_ID_KEY); + if (fieldId != null) { + columnsById.put(Integer.parseInt(fieldId), column); + } + }); return columnsById.buildOrThrow(); } private static Integer getIcebergFieldId(OrcColumn column) { String icebergId = column.getAttributes().get(ORC_ICEBERG_ID_KEY); - verify(icebergId != null, format("column %s does not have %s property", column, ORC_ICEBERG_ID_KEY)); + verify( + icebergId != null, + format("column %s does not have %s property", column, ORC_ICEBERG_ID_KEY)); return Integer.valueOf(icebergId); } @@ -706,16 +773,20 @@ private static Type getOrcReadType(Type columnType, TypeManager typeManager) { return new MapType(keyType, valueType, typeManager.getTypeOperators()); } if (columnType instanceof RowType) { - return RowType.from(((RowType) columnType).getFields().stream() - .map(field -> new RowType.Field(field.getName(), getOrcReadType(field.getType(), typeManager))) - .collect(toImmutableList())); + return RowType.from( + ((RowType) columnType) + .getFields().stream() + .map( + field -> + new RowType.Field( + field.getName(), getOrcReadType(field.getType(), typeManager))) + .collect(toImmutableList())); } return columnType; } - private static class IdBasedFieldMapperFactory - implements OrcReader.FieldMapperFactory { + private static class IdBasedFieldMapperFactory implements OrcReader.FieldMapperFactory { // Stores a mapping between subfield names and ids for every top-level/nested column id private final Map> fieldNameToIdMappingForTableColumns; @@ -725,7 +796,8 @@ public IdBasedFieldMapperFactory(List columns) { ImmutableMap.Builder> mapping = ImmutableMap.builder(); for (IcebergColumnHandle column : columns) { if (column.isUpdateRowIdColumn() || column.isMergeRowIdColumn()) { - // The update $row_id column contains fields which should not be accounted for in the mapping. + // The update $row_id column contains fields which should not be accounted for in the + // mapping. continue; } @@ -738,12 +810,12 @@ public IdBasedFieldMapperFactory(List columns) { @Override public OrcReader.FieldMapper create(OrcColumn column) { - Map nestedColumns = uniqueIndex( - column.getNestedColumns(), - IcebergPageSourceProvider::getIcebergFieldId); + Map nestedColumns = + uniqueIndex(column.getNestedColumns(), IcebergPageSourceProvider::getIcebergFieldId); int icebergId = getIcebergFieldId(column); - return new IdBasedFieldMapper(nestedColumns, fieldNameToIdMappingForTableColumns.get(icebergId)); + return new IdBasedFieldMapper( + nestedColumns, fieldNameToIdMappingForTableColumns.get(icebergId)); } private static void populateMapping( @@ -753,8 +825,11 @@ private static void populateMapping( fieldNameToIdMappingForTableColumns.put( identity.getId(), children.stream() - // Lower casing is required here because ORC StructColumnReader does the same before mapping - .collect(toImmutableMap(child -> child.getName().toLowerCase(ENGLISH), ColumnIdentity::getId))); + // Lower casing is required here because ORC StructColumnReader does the same before + // mapping + .collect( + toImmutableMap( + child -> child.getName().toLowerCase(ENGLISH), ColumnIdentity::getId))); for (ColumnIdentity child : children) { populateMapping(child, fieldNameToIdMappingForTableColumns); @@ -762,24 +837,25 @@ private static void populateMapping( } } - private static class IdBasedFieldMapper - implements OrcReader.FieldMapper { + private static class IdBasedFieldMapper implements OrcReader.FieldMapper { private final Map idToColumnMappingForFile; private final Map nameToIdMappingForTableColumns; public IdBasedFieldMapper( Map idToColumnMappingForFile, Map nameToIdMappingForTableColumns) { - this.idToColumnMappingForFile = requireNonNull(idToColumnMappingForFile, "idToColumnMappingForFile is null"); - this.nameToIdMappingForTableColumns = requireNonNull( - nameToIdMappingForTableColumns, "nameToIdMappingForTableColumns is null"); + this.idToColumnMappingForFile = + requireNonNull(idToColumnMappingForFile, "idToColumnMappingForFile is null"); + this.nameToIdMappingForTableColumns = + requireNonNull(nameToIdMappingForTableColumns, "nameToIdMappingForTableColumns is null"); } @Override public OrcColumn get(String fieldName) { - int fieldId = requireNonNull( - nameToIdMappingForTableColumns.get(fieldName), - () -> format("Id mapping for field %s not found", fieldName)); + int fieldId = + requireNonNull( + nameToIdMappingForTableColumns.get(fieldName), + () -> format("Id mapping for field %s not found", fieldName)); return idToColumnMappingForFile.get(fieldId); } } @@ -807,30 +883,43 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); MessageType fileSchema = fileMetaData.getSchema(); if (nameMapping.isPresent() && !ParquetSchemaUtil.hasIds(fileSchema)) { - // NameMapping conversion is necessary because MetadataReader converts all column names to lowercase + // NameMapping conversion is necessary because MetadataReader converts all column names to + // lowercase // and NameMapping is case sensitive - fileSchema = ParquetSchemaUtil.applyNameMapping(fileSchema, convertToLowercase(nameMapping.get())); + fileSchema = + ParquetSchemaUtil.applyNameMapping(fileSchema, convertToLowercase(nameMapping.get())); } // Mapping from Iceberg field ID to Parquet fields. - Map parquetIdToField = fileSchema.getFields().stream() - .filter(field -> field.getId() != null) - .collect(toImmutableMap(field -> field.getId().intValue(), Function.identity())); + Map parquetIdToField = + fileSchema.getFields().stream() + .filter(field -> field.getId() != null) + .collect(toImmutableMap(field -> field.getId().intValue(), Function.identity())); Optional columnProjections = projectColumns(regularColumns); - List readColumns = columnProjections - .map(readerColumns -> (List) readerColumns.get() - .stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())) - .orElse(regularColumns); - - List parquetFields = readColumns.stream() - .map(column -> parquetIdToField.get(column.getId())) - .collect(toList()); - - MessageType requestedSchema = new MessageType( - fileSchema.getName(), parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList())); - Map, ColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema); - TupleDomain parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate); + List readColumns = + columnProjections + .map( + readerColumns -> + (List) + readerColumns.get().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableList())) + .orElse(regularColumns); + + List parquetFields = + readColumns.stream() + .map(column -> parquetIdToField.get(column.getId())) + .collect(toList()); + + MessageType requestedSchema = + new MessageType( + fileSchema.getName(), + parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList())); + Map, ColumnDescriptor> descriptorsByPath = + getDescriptors(fileSchema, requestedSchema); + TupleDomain parquetTupleDomain = + getParquetTupleDomain(descriptorsByPath, effectivePredicate); TupleDomainParquetPredicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath, dateTimeZone); @@ -841,8 +930,9 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( List blocks = new ArrayList<>(); for (BlockMetaData block : parquetMetadata.getBlocks()) { long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); - if (start <= firstDataPage && firstDataPage < start + length && - predicateMatches( + if (start <= firstDataPage + && firstDataPage < start + length + && predicateMatches( parquetPredicate, block, dataSource, @@ -864,31 +954,38 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema); - ConstantPopulatingPageSource.Builder constantPopulatingPageSourceBuilder = ConstantPopulatingPageSource.builder(); + ConstantPopulatingPageSource.Builder constantPopulatingPageSourceBuilder = + ConstantPopulatingPageSource.builder(); int parquetSourceChannel = 0; - ImmutableList.Builder parquetReaderColumnBuilder = ImmutableList.builder(); + ImmutableList.Builder parquetReaderColumnBuilder = + ImmutableList.builder(); for (int columnIndex = 0; columnIndex < readColumns.size(); columnIndex++) { IcebergColumnHandle column = readColumns.get(columnIndex); if (column.isIsDeletedColumn()) { constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(BOOLEAN, false)); } else if (partitionKeys.containsKey(column.getId())) { Type trinoType = column.getType(); - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock( - trinoType, - deserializePartitionValue(trinoType, partitionKeys.get(column.getId()).orElse(null), column.getName()))); + constantPopulatingPageSourceBuilder.addConstantColumn( + nativeValueToBlock( + trinoType, + deserializePartitionValue( + trinoType, + partitionKeys.get(column.getId()).orElse(null), + column.getName()))); } else if (column.isPathColumn()) { constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock(FILE_PATH.getType(), utf8Slice(inputFile.location()))); } else if (column.isFileModifiedTimeColumn()) { constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock( - FILE_MODIFIED_TIME.getType(), packDateTimeWithZone(inputFile.modificationTime(), UTC_KEY) - ) - ); + FILE_MODIFIED_TIME.getType(), + packDateTimeWithZone(inputFile.modificationTime(), UTC_KEY))); } else if (column.isUpdateRowIdColumn() || column.isMergeRowIdColumn()) { - // $row_id is a composite of multiple physical columns, it is assembled by the IcebergPageSource - parquetReaderColumnBuilder.add(new ParquetReaderColumn(column.getType(), Optional.empty(), false)); + // $row_id is a composite of multiple physical columns, it is assembled by the + // IcebergPageSource + parquetReaderColumnBuilder.add( + new ParquetReaderColumn(column.getType(), Optional.empty(), false)); constantPopulatingPageSourceBuilder.addDelegateColumn(parquetSourceChannel); parquetSourceChannel++; } else if (column.isRowPositionColumn()) { @@ -896,7 +993,8 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( constantPopulatingPageSourceBuilder.addDelegateColumn(parquetSourceChannel); parquetSourceChannel++; } else if (column.getId() == TRINO_MERGE_FILE_RECORD_COUNT) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(column.getType(), fileRecordCount)); + constantPopulatingPageSourceBuilder.addConstantColumn( + nativeValueToBlock(column.getType(), fileRecordCount)); } else if (column.getId() == TRINO_MERGE_PARTITION_SPEC_ID) { constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock(column.getType(), (long) partitionSpecId)); @@ -908,15 +1006,17 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( Type trinoType = column.getBaseType(); if (parquetField == null) { - parquetReaderColumnBuilder.add(new ParquetReaderColumn(trinoType, Optional.empty(), false)); + parquetReaderColumnBuilder.add( + new ParquetReaderColumn(trinoType, Optional.empty(), false)); } else { // The top level columns are already mapped by name/id appropriately. ColumnIO columnIO = messageColumnIO.getChild(parquetField.getName()); - parquetReaderColumnBuilder.add(new ParquetReaderColumn( - trinoType, - IcebergParquetColumnIOConverter.constructField( - new FieldContext(trinoType, column.getColumnIdentity()), columnIO), - false)); + parquetReaderColumnBuilder.add( + new ParquetReaderColumn( + trinoType, + IcebergParquetColumnIOConverter.constructField( + new FieldContext(trinoType, column.getColumnIdentity()), columnIO), + false)); } constantPopulatingPageSourceBuilder.addDelegateColumn(parquetSourceChannel); @@ -926,19 +1026,21 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( List parquetReaderColumns = parquetReaderColumnBuilder.build(); ParquetDataSourceId dataSourceId = dataSource.getId(); - ParquetReader parquetReader = new ParquetReader( - Optional.ofNullable(fileMetaData.getCreatedBy()), - getParquetReaderFields(parquetReaderColumns), - blocks, - blockStarts.build(), - dataSource, - dateTimeZone, - memoryContext, - options, - exception -> handleException(dataSourceId, exception)); + ParquetReader parquetReader = + new ParquetReader( + Optional.ofNullable(fileMetaData.getCreatedBy()), + getParquetReaderFields(parquetReaderColumns), + blocks, + blockStarts.build(), + dataSource, + dateTimeZone, + memoryContext, + options, + exception -> handleException(dataSourceId, exception)); return new ReaderPageSourceWithRowPositions( new ReaderPageSource( - constantPopulatingPageSourceBuilder.build(new ParquetPageSource(parquetReader, parquetReaderColumns)), + constantPopulatingPageSourceBuilder.build( + new ParquetPageSource(parquetReader, parquetReaderColumns)), columnProjections), startRowPosition, endRowPosition); @@ -955,9 +1057,10 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( if (e instanceof TrinoException) { throw (TrinoException) e; } - String message = format( - "Error opening Iceberg split %s (offset=%s, length=%s): %s", - inputFile.location(), start, length, e.getMessage()); + String message = + format( + "Error opening Iceberg split %s (offset=%s, length=%s): %s", + inputFile.location(), start, length, e.getMessage()); if (e instanceof ParquetCorruptionException) { throw new TrinoException(ICEBERG_BAD_DATA, message, e); @@ -981,15 +1084,21 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( Schema fileSchema, Optional nameMapping, List columns) { - ConstantPopulatingPageSource.Builder constantPopulatingPageSourceBuilder = ConstantPopulatingPageSource.builder(); + ConstantPopulatingPageSource.Builder constantPopulatingPageSourceBuilder = + ConstantPopulatingPageSource.builder(); int avroSourceChannel = 0; Optional columnProjections = projectColumns(columns); - List readColumns = columnProjections - .map(readerColumns -> (List) readerColumns.get() - .stream().map(IcebergColumnHandle.class::cast).collect(toImmutableList())) - .orElse(columns); + List readColumns = + columnProjections + .map( + readerColumns -> + (List) + readerColumns.get().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableList())) + .orElse(columns); InputFile file; OptionalLong fileModifiedTime = OptionalLong.empty(); @@ -1003,16 +1112,22 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( } // The column orders in the generated schema might be different from the original order - try (DataFileStream avroFileReader = new DataFileStream<>(file.newStream(), new GenericDatumReader<>())) { + try (DataFileStream avroFileReader = + new DataFileStream<>(file.newStream(), new GenericDatumReader<>())) { org.apache.avro.Schema avroSchema = avroFileReader.getSchema(); List fileFields = avroSchema.getFields(); - if (nameMapping.isPresent() && fileFields.stream().noneMatch(IcebergPageSourceProvider::hasId)) { - fileFields = fileFields.stream() - .map(field -> setMissingFieldId(field, nameMapping.get(), ImmutableList.of(field.name()))) - .collect(toImmutableList()); + if (nameMapping.isPresent() + && fileFields.stream().noneMatch(IcebergPageSourceProvider::hasId)) { + fileFields = + fileFields.stream() + .map( + field -> + setMissingFieldId(field, nameMapping.get(), ImmutableList.of(field.name()))) + .collect(toImmutableList()); } - Map fileColumnsByIcebergId = mapIdsToAvroFields(fileFields); + Map fileColumnsByIcebergId = + mapIdsToAvroFields(fileFields); ImmutableList.Builder columnNames = ImmutableList.builder(); ImmutableList.Builder columnTypes = ImmutableList.builder(); @@ -1026,10 +1141,10 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock(FILE_PATH.getType(), utf8Slice(file.location()))); } else if (column.isFileModifiedTimeColumn()) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock( - FILE_MODIFIED_TIME.getType(), - packDateTimeWithZone(fileModifiedTime.orElseThrow(), UTC_KEY) - )); + constantPopulatingPageSourceBuilder.addConstantColumn( + nativeValueToBlock( + FILE_MODIFIED_TIME.getType(), + packDateTimeWithZone(fileModifiedTime.orElseThrow(), UTC_KEY))); } else if (column.isRowPositionColumn()) { rowIndexChannels.add(true); columnNames.add(ROW_POSITION.name()); @@ -1037,7 +1152,8 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( constantPopulatingPageSourceBuilder.addDelegateColumn(avroSourceChannel); avroSourceChannel++; } else if (column.getId() == TRINO_MERGE_FILE_RECORD_COUNT) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(column.getType(), fileRecordCount)); + constantPopulatingPageSourceBuilder.addConstantColumn( + nativeValueToBlock(column.getType(), fileRecordCount)); } else if (column.getId() == TRINO_MERGE_PARTITION_SPEC_ID) { constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock(column.getType(), (long) partitionSpecId)); @@ -1045,7 +1161,8 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( constantPopulatingPageSourceBuilder.addConstantColumn( nativeValueToBlock(column.getType(), utf8Slice(partitionData))); } else if (field == null) { - constantPopulatingPageSourceBuilder.addConstantColumn(nativeValueToBlock(column.getType(), null)); + constantPopulatingPageSourceBuilder.addConstantColumn( + nativeValueToBlock(column.getType(), null)); } else { rowIndexChannels.add(false); columnNames.add(column.getName()); @@ -1057,16 +1174,17 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( return new ReaderPageSourceWithRowPositions( new ReaderPageSource( - constantPopulatingPageSourceBuilder.build(new IcebergAvroPageSource( - file, - start, - length, - fileSchema, - nameMapping, - columnNames.build(), - columnTypes.build(), - rowIndexChannels.build(), - newSimpleAggregatedMemoryContext())), + constantPopulatingPageSourceBuilder.build( + new IcebergAvroPageSource( + file, + start, + length, + fileSchema, + nameMapping, + columnNames.build(), + columnTypes.build(), + rowIndexChannels.build(), + newSimpleAggregatedMemoryContext())), columnProjections), Optional.empty(), Optional.empty()); @@ -1120,38 +1238,49 @@ private static MappedFields convertToLowercase(MappedFields mappedFields) { private static List convertToLowercase(List fields) { return fields.stream() - .map(mappedField -> { - Set lowercaseNames = - mappedField.names().stream().map(name -> name.toLowerCase(ENGLISH)).collect(toImmutableSet()); - return MappedField.of(mappedField.id(), lowercaseNames, convertToLowercase(mappedField.nestedMapping())); - }) + .map( + mappedField -> { + Set lowercaseNames = + mappedField.names().stream() + .map(name -> name.toLowerCase(ENGLISH)) + .collect(toImmutableSet()); + return MappedField.of( + mappedField.id(), + lowercaseNames, + convertToLowercase(mappedField.nestedMapping())); + }) .collect(toImmutableList()); } - private static class IcebergOrcProjectedLayout - implements ProjectedLayout { + private static class IcebergOrcProjectedLayout implements ProjectedLayout { private final Map projectedLayoutForFieldId; private IcebergOrcProjectedLayout(Map projectedLayoutForFieldId) { - this.projectedLayoutForFieldId = ImmutableMap.copyOf( - requireNonNull(projectedLayoutForFieldId, "projectedLayoutForFieldId is null")); + this.projectedLayoutForFieldId = + ImmutableMap.copyOf( + requireNonNull(projectedLayoutForFieldId, "projectedLayoutForFieldId is null")); } - public static ProjectedLayout createProjectedLayout(OrcColumn root, List> fieldIdDereferences) { + public static ProjectedLayout createProjectedLayout( + OrcColumn root, List> fieldIdDereferences) { if (fieldIdDereferences.stream().anyMatch(List::isEmpty)) { return fullyProjectedLayout(); } - Map>> dereferencesByField = fieldIdDereferences.stream() - .collect(groupingBy( - sequence -> sequence.get(0), - mapping(sequence -> sequence.subList(1, sequence.size()), toUnmodifiableList()))); + Map>> dereferencesByField = + fieldIdDereferences.stream() + .collect( + groupingBy( + sequence -> sequence.get(0), + mapping( + sequence -> sequence.subList(1, sequence.size()), toUnmodifiableList()))); ImmutableMap.Builder fieldLayouts = ImmutableMap.builder(); for (OrcColumn nestedColumn : root.getNestedColumns()) { Integer fieldId = getIcebergFieldId(nestedColumn); if (dereferencesByField.containsKey(fieldId)) { - fieldLayouts.put(fieldId, createProjectedLayout(nestedColumn, dereferencesByField.get(fieldId))); + fieldLayouts.put( + fieldId, createProjectedLayout(nestedColumn, dereferencesByField.get(fieldId))); } } @@ -1165,9 +1294,7 @@ public ProjectedLayout getFieldLayout(OrcColumn orcColumn) { } } - /** - * Creates a mapping between the input {@code columns} and base columns if required. - */ + /** Creates a mapping between the input {@code columns} and base columns if required. */ public static Optional projectColumns(List columns) { requireNonNull(columns, "columns is null"); @@ -1206,17 +1333,25 @@ private static TupleDomain getParquetTupleDomain( } ImmutableMap.Builder predicate = ImmutableMap.builder(); - effectivePredicate.getDomains().orElseThrow().forEach((columnHandle, domain) -> { - String baseType = columnHandle.getType().getTypeSignature().getBase(); - // skip looking up predicates for complex types as Parquet only stores stats for primitives - if (columnHandle.isBaseColumn() && (!baseType.equals(StandardTypes.MAP) && - !baseType.equals(StandardTypes.ARRAY) && !baseType.equals(StandardTypes.ROW))) { - ColumnDescriptor descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); - if (descriptor != null) { - predicate.put(descriptor, domain); - } - } - }); + effectivePredicate + .getDomains() + .orElseThrow() + .forEach( + (columnHandle, domain) -> { + String baseType = columnHandle.getType().getTypeSignature().getBase(); + // skip looking up predicates for complex types as Parquet only stores stats for + // primitives + if (columnHandle.isBaseColumn() + && (!baseType.equals(StandardTypes.MAP) + && !baseType.equals(StandardTypes.ARRAY) + && !baseType.equals(StandardTypes.ROW))) { + ColumnDescriptor descriptor = + descriptorsByPath.get(ImmutableList.of(columnHandle.getName())); + if (descriptor != null) { + predicate.put(descriptor, domain); + } + } + }); return TupleDomain.withColumnDomains(predicate.buildOrThrow()); } @@ -1227,17 +1362,20 @@ private static TrinoException handleException(OrcDataSourceId dataSourceId, Exce if (exception instanceof OrcCorruptionException) { return new TrinoException(ICEBERG_BAD_DATA, exception); } - return new TrinoException(ICEBERG_CURSOR_ERROR, format("Failed to read ORC file: %s", dataSourceId), exception); + return new TrinoException( + ICEBERG_CURSOR_ERROR, format("Failed to read ORC file: %s", dataSourceId), exception); } - private static TrinoException handleException(ParquetDataSourceId dataSourceId, Exception exception) { + private static TrinoException handleException( + ParquetDataSourceId dataSourceId, Exception exception) { if (exception instanceof TrinoException) { return (TrinoException) exception; } if (exception instanceof ParquetCorruptionException) { return new TrinoException(ICEBERG_BAD_DATA, exception); } - return new TrinoException(ICEBERG_CURSOR_ERROR, format("Failed to read Parquet file: %s", dataSourceId), exception); + return new TrinoException( + ICEBERG_CURSOR_ERROR, format("Failed to read Parquet file: %s", dataSourceId), exception); } public static final class ReaderPageSourceWithRowPositions { diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplit.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplit.java index 3419ad6102..070949e13a 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplit.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplit.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,11 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.base.MoreObjects.toStringHelper; +import static io.airlift.slice.SizeOf.estimatedSizeOf; +import static java.lang.Math.toIntExact; +import static java.util.Objects.requireNonNull; + import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.collect.ImmutableList; @@ -32,18 +36,12 @@ import java.util.List; -import static com.google.common.base.MoreObjects.toStringHelper; -import static io.airlift.slice.SizeOf.estimatedSizeOf; -import static java.lang.Math.toIntExact; -import static java.util.Objects.requireNonNull; - /** * Iceberg original IcebergSplit has some problems for arctic, such as iceberg version, table type. */ -public class IcebergSplit - implements ConnectorSplit { - private static final int INSTANCE_SIZE = toIntExact( - ClassLayout.parseClass(io.trino.plugin.iceberg.IcebergSplit.class).instanceSize()); +public class IcebergSplit implements ConnectorSplit { + private static final int INSTANCE_SIZE = + toIntExact(ClassLayout.parseClass(io.trino.plugin.iceberg.IcebergSplit.class).instanceSize()); private final String path; private final long start; @@ -163,20 +161,16 @@ public Object getInfo() { @Override public long getRetainedSizeInBytes() { - return INSTANCE_SIZE + - estimatedSizeOf(path) + - estimatedSizeOf(addresses, HostAddress::getRetainedSizeInBytes) + - estimatedSizeOf(partitionSpecJson) + - estimatedSizeOf(partitionDataJson) + - estimatedSizeOf(deletes, TrinoDeleteFile::getRetainedSizeInBytes); + return INSTANCE_SIZE + + estimatedSizeOf(path) + + estimatedSizeOf(addresses, HostAddress::getRetainedSizeInBytes) + + estimatedSizeOf(partitionSpecJson) + + estimatedSizeOf(partitionDataJson) + + estimatedSizeOf(deletes, TrinoDeleteFile::getRetainedSizeInBytes); } @Override public String toString() { - return toStringHelper(this) - .addValue(path) - .addValue(start) - .addValue(length) - .toString(); + return toStringHelper(this).addValue(path).addValue(start).addValue(length).toString(); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitManager.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitManager.java index bac39ce899..f717f3de43 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitManager.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitManager.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.unkeyed; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getDynamicFilteringWaitTimeout; +import static io.trino.plugin.iceberg.IcebergSessionProperties.getMinimumAssignedSplitWeight; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableList; import com.netease.arctic.trino.ArcticTransactionManager; import com.netease.arctic.trino.TableNameResolve; @@ -39,12 +43,9 @@ import javax.inject.Inject; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getDynamicFilteringWaitTimeout; -import static io.trino.plugin.iceberg.IcebergSessionProperties.getMinimumAssignedSplitWeight; -import static java.util.Objects.requireNonNull; - /** - * Iceberg original IcebergSplitManager has some problems for arctic, such as iceberg version, table type. + * Iceberg original IcebergSplitManager has some problems for arctic, such as iceberg version, table + * type. */ public class IcebergSplitManager implements ConnectorSplitManager { @@ -79,27 +80,31 @@ public ConnectorSplitSource getSplits( } Table icebergTable = - transactionManager.get(transaction).getArcticTable(table.getSchemaTableName()).asUnkeyedTable(); + transactionManager + .get(transaction) + .getArcticTable(table.getSchemaTableName()) + .asUnkeyedTable(); Duration dynamicFilteringWaitTimeout = getDynamicFilteringWaitTimeout(session); - TableScan tableScan = icebergTable.newScan() - .useSnapshot(table.getSnapshotId().get()); + TableScan tableScan = icebergTable.newScan().useSnapshot(table.getSnapshotId().get()); TableNameResolve resolve = new TableNameResolve(table.getTableName()); - IcebergSplitSource splitSource = new IcebergSplitSource( - fileSystemFactory, - session, - table, - tableScan, - table.getMaxScannedFileSize(), - dynamicFilter, - dynamicFilteringWaitTimeout, - constraint, - typeManager, - table.isRecordScannedFiles(), - getMinimumAssignedSplitWeight(session), - resolve.withSuffix() ? !resolve.isBase() : false); + IcebergSplitSource splitSource = + new IcebergSplitSource( + fileSystemFactory, + session, + table, + tableScan, + table.getMaxScannedFileSize(), + dynamicFilter, + dynamicFilteringWaitTimeout, + constraint, + typeManager, + table.isRecordScannedFiles(), + getMinimumAssignedSplitWeight(session), + resolve.withSuffix() ? !resolve.isBase() : false); - return new ClassLoaderSafeConnectorSplitSource(splitSource, Thread.currentThread().getContextClassLoader()); + return new ClassLoaderSafeConnectorSplitSource( + splitSource, Thread.currentThread().getContextClassLoader()); } } diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitSource.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitSource.java index 551b639c6d..0479439ace 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitSource.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/IcebergSplitSource.java @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +18,32 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Suppliers.memoize; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.Sets.intersection; +import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; +import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; +import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; +import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; +import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; +import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; +import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; +import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; +import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; +import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; +import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.CompletableFuture.completedFuture; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static org.apache.iceberg.types.Conversions.fromByteBuffer; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import com.google.common.collect.ImmutableList; @@ -63,6 +88,7 @@ import org.apache.iceberg.util.TableScanUtil; import javax.annotation.Nullable; + import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; @@ -75,39 +101,15 @@ import java.util.concurrent.CompletableFuture; import java.util.function.Supplier; -import static com.google.common.base.Preconditions.checkState; -import static com.google.common.base.Suppliers.memoize; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static com.google.common.collect.Sets.intersection; -import static io.airlift.slice.Slices.utf8Slice; -import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; -import static io.trino.plugin.iceberg.IcebergColumnHandle.fileModifiedTimeColumnHandle; -import static io.trino.plugin.iceberg.IcebergColumnHandle.pathColumnHandle; -import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_FILESYSTEM_ERROR; -import static io.trino.plugin.iceberg.IcebergMetadataColumn.isMetadataColumnId; -import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; -import static io.trino.plugin.iceberg.IcebergTypes.convertIcebergValueToTrino; -import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; -import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; -import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; -import static io.trino.plugin.iceberg.IcebergUtil.primitiveFieldTypes; -import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; -import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static java.util.Objects.requireNonNull; -import static java.util.concurrent.CompletableFuture.completedFuture; -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static org.apache.iceberg.types.Conversions.fromByteBuffer; - /** - * Iceberg original IcebergSplitSource has some problems for arctic, such as iceberg version, table type. + * Iceberg original IcebergSplitSource has some problems for arctic, such as iceberg version, table + * type. */ -public class IcebergSplitSource - implements ConnectorSplitSource { - private static final ConnectorSplitBatch EMPTY_BATCH = new ConnectorSplitBatch(ImmutableList.of(), false); - private static final ConnectorSplitBatch NO_MORE_SPLITS_BATCH = new ConnectorSplitBatch(ImmutableList.of(), true); +public class IcebergSplitSource implements ConnectorSplitSource { + private static final ConnectorSplitBatch EMPTY_BATCH = + new ConnectorSplitBatch(ImmutableList.of(), false); + private static final ConnectorSplitBatch NO_MORE_SPLITS_BATCH = + new ConnectorSplitBatch(ImmutableList.of(), true); private final TrinoFileSystemFactory fileSystemFactory; private final ConnectorSession session; @@ -161,8 +163,10 @@ public IcebergSplitSource( this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.recordScannedFiles = recordScannedFiles; this.minimumAssignedSplitWeight = minimumAssignedSplitWeight; - this.dataColumnPredicate = tableHandle.getEnforcedPredicate().filter((column, domain) -> - !isMetadataColumnId(column.getId())); + this.dataColumnPredicate = + tableHandle + .getEnforcedPredicate() + .filter((column, domain) -> !isMetadataColumnId(column.getId())); this.pathDomain = getPathDomain(tableHandle.getEnforcedPredicate()); this.fileModifiedTimeDomain = getFileModifiedTimePathDomain(tableHandle.getEnforcedPredicate()); this.isChange = isChange; @@ -170,21 +174,25 @@ public IcebergSplitSource( @Override public CompletableFuture getNextBatch(int maxSize) { - long timeLeft = dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS); + long timeLeft = + dynamicFilteringWaitTimeoutMillis - dynamicFilterWaitStopwatch.elapsed(MILLISECONDS); if (dynamicFilter.isAwaitable() && timeLeft > 0) { - return dynamicFilter.isBlocked() + return dynamicFilter + .isBlocked() .thenApply(ignored -> EMPTY_BATCH) .completeOnTimeout(EMPTY_BATCH, timeLeft, MILLISECONDS); } if (fileScanTaskIterable == null) { - // Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg API + // Used to avoid duplicating work if the Dynamic Filter was already pushed down to the Iceberg + // API boolean dynamicFilterIsComplete = dynamicFilter.isComplete(); this.pushedDownDynamicFilterPredicate = dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast); - TupleDomain fullPredicate = tableHandle.getUnenforcedPredicate() - .intersect(pushedDownDynamicFilterPredicate); - // TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing TupleDomain#simplify + TupleDomain fullPredicate = + tableHandle.getUnenforcedPredicate().intersect(pushedDownDynamicFilterPredicate); + // TODO: (https://github.com/trinodb/trino/issues/9743): Consider removing + // TupleDomain#simplify TupleDomain simplifiedPredicate = fullPredicate.simplify(ICEBERG_DOMAIN_COMPACTION_THRESHOLD); boolean usedSimplifiedPredicate = !simplifiedPredicate.equals(fullPredicate); @@ -193,8 +201,8 @@ public CompletableFuture getNextBatch(int maxSize) { this.pushedDownDynamicFilterPredicate = TupleDomain.all(); } - TupleDomain effectivePredicate = dataColumnPredicate - .intersect(simplifiedPredicate); + TupleDomain effectivePredicate = + dataColumnPredicate.intersect(simplifiedPredicate); if (effectivePredicate.isNone()) { finish(); @@ -202,7 +210,8 @@ public CompletableFuture getNextBatch(int maxSize) { } Expression filterExpression = toIcebergExpression(effectivePredicate); - // If the Dynamic Filter will be evaluated against each file, stats are required. Otherwise, skip them. + // If the Dynamic Filter will be evaluated against each file, stats are required. Otherwise, + // skip them. boolean requiresColumnStats = usedSimplifiedPredicate || !dynamicFilterIsComplete; TableScan scan = tableScan.filter(filterExpression); if (requiresColumnStats) { @@ -211,17 +220,19 @@ public CompletableFuture getNextBatch(int maxSize) { if (tableScan instanceof ChangeTableIncrementalScan) { this.fileScanTaskIterable = scan.planFiles(); } else { - this.fileScanTaskIterable = TableScanUtil.splitFiles(scan.planFiles(), tableScan.targetSplitSize()); + this.fileScanTaskIterable = + TableScanUtil.splitFiles(scan.planFiles(), tableScan.targetSplitSize()); } closer.register(fileScanTaskIterable); this.fileScanTaskIterator = fileScanTaskIterable.iterator(); closer.register(fileScanTaskIterator); - // TODO: Remove when NPE check has been released: https://github.com/trinodb/trino/issues/15372 + // TODO: Remove when NPE check has been released: + // https://github.com/trinodb/trino/issues/15372 isFinished(); } - TupleDomain dynamicFilterPredicate = dynamicFilter.getCurrentPredicate() - .transformKeys(IcebergColumnHandle.class::cast); + TupleDomain dynamicFilterPredicate = + dynamicFilter.getCurrentPredicate().transformKeys(IcebergColumnHandle.class::cast); if (dynamicFilterPredicate.isNone()) { finish(); return completedFuture(NO_MORE_SPLITS_BATCH); @@ -231,9 +242,9 @@ public CompletableFuture getNextBatch(int maxSize) { ImmutableList.Builder splits = ImmutableList.builder(); while (fileScanTasks.hasNext()) { FileScanTask scanTask = fileScanTasks.next(); - if (scanTask.deletes().isEmpty() && - maxScannedFileSizeInBytes.isPresent() && - scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) { + if (scanTask.deletes().isEmpty() + && maxScannedFileSizeInBytes.isPresent() + && scanTask.file().fileSizeInBytes() > maxScannedFileSizeInBytes.get()) { continue; } @@ -242,7 +253,8 @@ public CompletableFuture getNextBatch(int maxSize) { } if (!fileModifiedTimeDomain.isAll()) { long fileModifiedTime = getModificationTime(scanTask.file().path().toString()); - if (!fileModifiedTimeDomain.includesNullableValue(packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { + if (!fileModifiedTimeDomain.includesNullableValue( + packDateTimeWithZone(fileModifiedTime, UTC_KEY))) { continue; } } @@ -251,28 +263,32 @@ public CompletableFuture getNextBatch(int maxSize) { Schema fileSchema = scanTask.spec().schema(); Map> partitionKeys = getPartitionKeys(scanTask); - Set identityPartitionColumns = partitionKeys.keySet().stream() - .map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)) - .collect(toImmutableSet()); - - Supplier> partitionValues = memoize(() -> { - Map bindings = new HashMap<>(); - for (IcebergColumnHandle partitionColumn : identityPartitionColumns) { - Object partitionValue = deserializePartitionValue( - partitionColumn.getType(), - partitionKeys.get(partitionColumn.getId()).orElse(null), - partitionColumn.getName()); - NullableValue bindingValue = new NullableValue(partitionColumn.getType(), partitionValue); - bindings.put(partitionColumn, bindingValue); - } - return bindings; - }); - - if (!dynamicFilterPredicate.isAll() && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) { + Set identityPartitionColumns = + partitionKeys.keySet().stream() + .map(fieldId -> getColumnHandle(fileSchema.findField(fieldId), typeManager)) + .collect(toImmutableSet()); + + Supplier> partitionValues = + memoize( + () -> { + Map bindings = new HashMap<>(); + for (IcebergColumnHandle partitionColumn : identityPartitionColumns) { + Object partitionValue = + deserializePartitionValue( + partitionColumn.getType(), + partitionKeys.get(partitionColumn.getId()).orElse(null), + partitionColumn.getName()); + NullableValue bindingValue = + new NullableValue(partitionColumn.getType(), partitionValue); + bindings.put(partitionColumn, bindingValue); + } + return bindings; + }); + + if (!dynamicFilterPredicate.isAll() + && !dynamicFilterPredicate.equals(pushedDownDynamicFilterPredicate)) { if (!partitionMatchesPredicate( - identityPartitionColumns, - partitionValues, - dynamicFilterPredicate)) { + identityPartitionColumns, partitionValues, dynamicFilterPredicate)) { continue; } if (!fileMatchesPredicate( @@ -288,11 +304,12 @@ public CompletableFuture getNextBatch(int maxSize) { continue; } if (recordScannedFiles) { - // Positional and Equality deletes can only be cleaned up if the whole table has been optimized. + // Positional and Equality deletes can only be cleaned up if the whole table has been + // optimized. // Equality deletes may apply to many files, and position deletes may be grouped together. // This makes it difficult to know if they are obsolete. - List fullyAppliedDeletes = tableHandle.getEnforcedPredicate().isAll() ? - scanTask.deletes() : ImmutableList.of(); + List fullyAppliedDeletes = + tableHandle.getEnforcedPredicate().isAll() ? scanTask.deletes() : ImmutableList.of(); scannedFiles.add(new DataFileWithDeleteFiles(scanTask.file(), fullyAppliedDeletes)); } splits.add(icebergSplit); @@ -305,7 +322,8 @@ private long getModificationTime(String path) { TrinoInputFile inputFile = fileSystemFactory.create(session).newInputFile(path); return inputFile.modificationTime(); } catch (IOException e) { - throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, "Failed to get file modification time: " + path, e); + throw new TrinoException( + ICEBERG_FILESYSTEM_ERROR, "Failed to get file modification time: " + path, e); } } @@ -363,11 +381,12 @@ static boolean fileMatchesPredicate( mayContainNulls = nullValueCount == null || nullValueCount > 0; } Type type = primitiveTypeForFieldId.get(fieldId); - Domain statisticsDomain = domainForStatistics( - column.getType(), - lowerBounds == null ? null : fromByteBuffer(type, lowerBounds.get(fieldId)), - upperBounds == null ? null : fromByteBuffer(type, upperBounds.get(fieldId)), - mayContainNulls); + Domain statisticsDomain = + domainForStatistics( + column.getType(), + lowerBounds == null ? null : fromByteBuffer(type, lowerBounds.get(fieldId)), + upperBounds == null ? null : fromByteBuffer(type, upperBounds.get(fieldId)), + mayContainNulls); if (!domain.overlaps(statisticsDomain)) { return false; } @@ -387,16 +406,19 @@ private static Domain domainForStatistics( Range statisticsRange; if (lowerBound != null && upperBound != null) { - statisticsRange = Range.range( - type, - convertIcebergValueToTrino(icebergType, lowerBound), - true, - convertIcebergValueToTrino(icebergType, upperBound), - true); + statisticsRange = + Range.range( + type, + convertIcebergValueToTrino(icebergType, lowerBound), + true, + convertIcebergValueToTrino(icebergType, upperBound), + true); } else if (upperBound != null) { - statisticsRange = Range.lessThanOrEqual(type, convertIcebergValueToTrino(icebergType, upperBound)); + statisticsRange = + Range.lessThanOrEqual(type, convertIcebergValueToTrino(icebergType, upperBound)); } else { - statisticsRange = Range.greaterThanOrEqual(type, convertIcebergValueToTrino(icebergType, lowerBound)); + statisticsRange = + Range.greaterThanOrEqual(type, convertIcebergValueToTrino(icebergType, lowerBound)); } return Domain.create(ValueSet.ofRanges(statisticsRange), mayContainNulls); } @@ -408,8 +430,9 @@ static boolean partitionMatchesConstraint( // We use Constraint just to pass functional predicate here from DistributedExecutionPlanner verify(constraint.getSummary().isAll()); - if (constraint.predicate().isEmpty() || - intersection(constraint.getPredicateColumns().orElseThrow(), identityPartitionColumns).isEmpty()) { + if (constraint.predicate().isEmpty() + || intersection(constraint.getPredicateColumns().orElseThrow(), identityPartitionColumns) + .isEmpty()) { return true; } return constraint.predicate().get().test(partitionValues.get()); @@ -428,7 +451,8 @@ static boolean partitionMatchesPredicate( for (IcebergColumnHandle partitionColumn : identityPartitionColumns) { Domain allowedDomain = domains.get(partitionColumn); if (allowedDomain != null) { - if (!allowedDomain.includesNullableValue(partitionValues.get().get(partitionColumn).getValue())) { + if (!allowedDomain.includesNullableValue( + partitionValues.get().get(partitionColumn).getValue())) { return false; } } @@ -455,33 +479,35 @@ private IcebergSplit toIcebergSplit(FileScanTask task) { ImmutableList.of(), PartitionSpecParser.toJson(task.spec()), PartitionData.toJson(task.file().partition()), - task.deletes().stream() - .map(TrinoDeleteFile::copyOf) - .collect(toImmutableList()), + task.deletes().stream().map(TrinoDeleteFile::copyOf).collect(toImmutableList()), transactionId, dataFileType); } private static Domain getPathDomain(TupleDomain effectivePredicate) { IcebergColumnHandle pathColumn = pathColumnHandle(); - Domain domain = effectivePredicate.getDomains().orElseThrow(() -> - new IllegalArgumentException("Unexpected NONE tuple domain")) - .get(pathColumn); + Domain domain = + effectivePredicate + .getDomains() + .orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) + .get(pathColumn); if (domain == null) { return Domain.all(pathColumn.getType()); } return domain; } - private static Domain getFileModifiedTimePathDomain(TupleDomain effectivePredicate) { + private static Domain getFileModifiedTimePathDomain( + TupleDomain effectivePredicate) { IcebergColumnHandle fileModifiedTimeColumn = fileModifiedTimeColumnHandle(); - Domain domain = effectivePredicate.getDomains().orElseThrow(() -> - new IllegalArgumentException("Unexpected NONE tuple domain")) - .get(fileModifiedTimeColumn); + Domain domain = + effectivePredicate + .getDomains() + .orElseThrow(() -> new IllegalArgumentException("Unexpected NONE tuple domain")) + .get(fileModifiedTimeColumn); if (domain == null) { return Domain.all(fileModifiedTimeColumn.getType()); } return domain; } } - diff --git a/trino/src/main/java/com/netease/arctic/trino/unkeyed/TrinoDeleteFilter.java b/trino/src/main/java/com/netease/arctic/trino/unkeyed/TrinoDeleteFilter.java index edab448a8d..65a01f0d43 100644 --- a/trino/src/main/java/com/netease/arctic/trino/unkeyed/TrinoDeleteFilter.java +++ b/trino/src/main/java/com/netease/arctic/trino/unkeyed/TrinoDeleteFilter.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.unkeyed; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static java.util.Objects.requireNonNull; + import com.netease.arctic.io.reader.DeleteFilter; import com.netease.arctic.trino.delete.TrinoRow; import io.trino.plugin.iceberg.IcebergColumnHandle; @@ -33,15 +37,11 @@ import java.util.Optional; import java.util.Set; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.util.Objects.requireNonNull; - /** - * Iceberg original TrinoDeleteFilter has some problems for arctic, such as iceberg version, table type. + * Iceberg original TrinoDeleteFilter has some problems for arctic, such as iceberg version, table + * type. */ -public class TrinoDeleteFilter - extends DeleteFilter { +public class TrinoDeleteFilter extends DeleteFilter { private final FileIO fileIO; public TrinoDeleteFilter( @@ -63,16 +63,15 @@ protected InputFile getInputFile(String s) { return fileIO.newInputFile(s); } - private static Schema filterSchema(Schema tableSchema, List requestedColumns) { - Set requestedFieldIds = requestedColumns.stream() - .map(IcebergColumnHandle::getId) - .collect(toImmutableSet()); + private static Schema filterSchema( + Schema tableSchema, List requestedColumns) { + Set requestedFieldIds = + requestedColumns.stream().map(IcebergColumnHandle::getId).collect(toImmutableSet()); return new Schema(filterFieldList(tableSchema.columns(), requestedFieldIds)); } private static List filterFieldList( - List fields, - Set requestedFieldIds) { + List fields, Set requestedFieldIds) { return fields.stream() .map(field -> filterField(field, requestedFieldIds)) .filter(Optional::isPresent) @@ -80,23 +79,26 @@ private static List filterFieldList( .collect(toImmutableList()); } - private static Optional filterField(Types.NestedField field, Set requestedFieldIds) { + private static Optional filterField( + Types.NestedField field, Set requestedFieldIds) { Type fieldType = field.type(); if (requestedFieldIds.contains(field.fieldId())) { return Optional.of(field); } if (fieldType.isStructType()) { - List requiredChildren = filterFieldList(fieldType.asStructType().fields(), requestedFieldIds); + List requiredChildren = + filterFieldList(fieldType.asStructType().fields(), requestedFieldIds); if (requiredChildren.isEmpty()) { return Optional.empty(); } - return Optional.of(Types.NestedField.of( - field.fieldId(), - field.isOptional(), - field.name(), - Types.StructType.of(requiredChildren), - field.doc())); + return Optional.of( + Types.NestedField.of( + field.fieldId(), + field.isOptional(), + field.name(), + Types.StructType.of(requiredChildren), + field.doc())); } return Optional.empty(); diff --git a/trino/src/main/java/com/netease/arctic/trino/util/MetricUtil.java b/trino/src/main/java/com/netease/arctic/trino/util/MetricUtil.java index 2d99cca19a..3b27d6a90e 100644 --- a/trino/src/main/java/com/netease/arctic/trino/util/MetricUtil.java +++ b/trino/src/main/java/com/netease/arctic/trino/util/MetricUtil.java @@ -23,9 +23,7 @@ import java.util.function.Supplier; -/** - * tools to metric run time - */ +/** tools to metric run time */ public class MetricUtil { private static final Logger LOG = LoggerFactory.getLogger(MetricUtil.class); diff --git a/trino/src/main/java/com/netease/arctic/trino/util/ObjectSerializerUtil.java b/trino/src/main/java/com/netease/arctic/trino/util/ObjectSerializerUtil.java index 3ed6be1521..c696c09f21 100644 --- a/trino/src/main/java/com/netease/arctic/trino/util/ObjectSerializerUtil.java +++ b/trino/src/main/java/com/netease/arctic/trino/util/ObjectSerializerUtil.java @@ -24,14 +24,10 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -/** - * Tools to resolve java Serializer - */ +/** Tools to resolve java Serializer */ public class ObjectSerializerUtil { - /** - * Write java class to byte array - */ + /** Write java class to byte array */ public static byte[] write(Object o) { try (ByteArrayOutputStream arrayOutputStream = new ByteArrayOutputStream(); ObjectOutputStream objectOutputStream = new ObjectOutputStream(arrayOutputStream)) { @@ -42,9 +38,7 @@ public static byte[] write(Object o) { } } - /** - * Read class from Serialize byte array - */ + /** Read class from Serialize byte array */ public static T read(byte[] bytes, Class clazz) { if (bytes == null) { return null; diff --git a/trino/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java b/trino/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java index 252ba83967..026e7988b1 100644 --- a/trino/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java +++ b/trino/src/main/java/io/trino/parquet/reader/ColumnReaderFactory.java @@ -1,35 +1,5 @@ package io.trino.parquet.reader; -import io.trino.memory.context.AggregatedMemoryContext; -import io.trino.memory.context.LocalMemoryContext; -import io.trino.parquet.PrimitiveField; -import io.trino.parquet.reader.decoders.TransformingValueDecoders; -import io.trino.parquet.reader.decoders.ValueDecoders; -import io.trino.parquet.reader.flat.FlatColumnReader; -import io.trino.spi.TrinoException; -import io.trino.spi.type.AbstractIntType; -import io.trino.spi.type.AbstractLongType; -import io.trino.spi.type.AbstractVariableWidthType; -import io.trino.spi.type.CharType; -import io.trino.spi.type.DecimalType; -import io.trino.spi.type.TimeType; -import io.trino.spi.type.TimestampType; -import io.trino.spi.type.TimestampWithTimeZoneType; -import io.trino.spi.type.Type; -import io.trino.spi.type.VarcharType; -import org.apache.parquet.schema.LogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor; -import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; -import org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.joda.time.DateTimeZone; - -import java.util.Optional; - import static io.trino.parquet.ParquetTypeUtils.createDecimalType; import static io.trino.parquet.reader.decoders.TransformingValueDecoders.getInt96ToLongTimestampDecoder; import static io.trino.parquet.reader.decoders.TransformingValueDecoders.getInt96ToShortTimestampDecoder; @@ -63,12 +33,39 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96; -/** - * Copy from trino-parquet ColumnReaderFactory and do some change to adapt Arctic - */ +import io.trino.memory.context.AggregatedMemoryContext; +import io.trino.memory.context.LocalMemoryContext; +import io.trino.parquet.PrimitiveField; +import io.trino.parquet.reader.decoders.TransformingValueDecoders; +import io.trino.parquet.reader.decoders.ValueDecoders; +import io.trino.parquet.reader.flat.FlatColumnReader; +import io.trino.spi.TrinoException; +import io.trino.spi.type.AbstractIntType; +import io.trino.spi.type.AbstractLongType; +import io.trino.spi.type.AbstractVariableWidthType; +import io.trino.spi.type.CharType; +import io.trino.spi.type.DecimalType; +import io.trino.spi.type.TimeType; +import io.trino.spi.type.TimestampType; +import io.trino.spi.type.TimestampWithTimeZoneType; +import io.trino.spi.type.Type; +import io.trino.spi.type.VarcharType; +import org.apache.parquet.schema.LogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.IntLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimeLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.TimestampLogicalTypeAnnotation; +import org.apache.parquet.schema.LogicalTypeAnnotation.UUIDLogicalTypeAnnotation; +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.joda.time.DateTimeZone; + +import java.util.Optional; + +/** Copy from trino-parquet ColumnReaderFactory and do some change to adapt Arctic */ public final class ColumnReaderFactory { - private ColumnReaderFactory() { - } + private ColumnReaderFactory() {} public static ColumnReader create( PrimitiveField field, @@ -76,46 +73,55 @@ public static ColumnReader create( AggregatedMemoryContext aggregatedMemoryContext, boolean useBatchedColumnReaders) { Type type = field.getType(); - PrimitiveTypeName primitiveType = field.getDescriptor().getPrimitiveType().getPrimitiveTypeName(); - LogicalTypeAnnotation annotation = field.getDescriptor().getPrimitiveType().getLogicalTypeAnnotation(); + PrimitiveTypeName primitiveType = + field.getDescriptor().getPrimitiveType().getPrimitiveTypeName(); + LogicalTypeAnnotation annotation = + field.getDescriptor().getPrimitiveType().getLogicalTypeAnnotation(); LocalMemoryContext memoryContext = aggregatedMemoryContext.newLocalMemoryContext(ColumnReader.class.getSimpleName()); if (useBatchedColumnReaders && field.getDescriptor().getPath().length == 1) { if (BOOLEAN.equals(type) && primitiveType == PrimitiveTypeName.BOOLEAN) { - return new FlatColumnReader<>(field, ValueDecoders::getBooleanDecoder, BOOLEAN_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getBooleanDecoder, BOOLEAN_ADAPTER, memoryContext); } if (TINYINT.equals(type) && primitiveType == INT32) { if (isIntegerAnnotation(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getByteDecoder, BYTE_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getByteDecoder, BYTE_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (SMALLINT.equals(type) && primitiveType == INT32) { if (isIntegerAnnotation(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getShortDecoder, SHORT_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getShortDecoder, SHORT_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (DATE.equals(type) && primitiveType == INT32) { if (annotation == null || annotation instanceof DateLogicalTypeAnnotation) { - return new FlatColumnReader<>(field, ValueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (type instanceof AbstractIntType && primitiveType == INT32) { if (isIntegerAnnotation(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getIntDecoder, INT_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (type instanceof AbstractLongType && primitiveType == INT32) { if (isIntegerAnnotation(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getIntToLongDecoder, LONG_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getIntToLongDecoder, LONG_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (type instanceof TimeType && primitiveType == INT64) { - if (annotation instanceof TimeLogicalTypeAnnotation timeAnnotation && timeAnnotation.getUnit() == MICROS) { + if (annotation instanceof TimeLogicalTypeAnnotation timeAnnotation + && timeAnnotation.getUnit() == MICROS) { return new FlatColumnReader<>( field, TransformingValueDecoders::getTimeMicrosDecoder, LONG_ADAPTER, memoryContext); } @@ -123,19 +129,23 @@ public static ColumnReader create( } if (type instanceof AbstractLongType && primitiveType == INT64) { if (BIGINT.equals(type) && annotation instanceof TimestampLogicalTypeAnnotation) { - return new FlatColumnReader<>(field, ValueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); } if (isIntegerAnnotation(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getLongDecoder, LONG_ADAPTER, memoryContext); } throw unsupportedException(type, field); } if (REAL.equals(type) && primitiveType == FLOAT) { - return new FlatColumnReader<>(field, ValueDecoders::getRealDecoder, INT_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getRealDecoder, INT_ADAPTER, memoryContext); } if (DOUBLE.equals(type)) { if (primitiveType == PrimitiveTypeName.DOUBLE) { - return new FlatColumnReader<>(field, ValueDecoders::getDoubleDecoder, LONG_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getDoubleDecoder, LONG_ADAPTER, memoryContext); } if (primitiveType == FLOAT) { return new FlatColumnReader<>( @@ -149,17 +159,20 @@ public static ColumnReader create( if (timestampType.isShort()) { return new FlatColumnReader<>( field, - (encoding, primitiveField) -> getInt96ToShortTimestampDecoder(encoding, primitiveField, timeZone), + (encoding, primitiveField) -> + getInt96ToShortTimestampDecoder(encoding, primitiveField, timeZone), LONG_ADAPTER, memoryContext); } return new FlatColumnReader<>( field, - (encoding, primitiveField) -> getInt96ToLongTimestampDecoder(encoding, primitiveField, timeZone), + (encoding, primitiveField) -> + getInt96ToLongTimestampDecoder(encoding, primitiveField, timeZone), INT96_ADAPTER, memoryContext); } - if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT96) { + if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType + && primitiveType == INT96) { if (timestampWithTimeZoneType.isShort()) { return new FlatColumnReader<>( field, @@ -175,15 +188,18 @@ public static ColumnReader create( if (timestampType.isShort()) { return switch (timestampAnnotation.getUnit()) { case MILLIS -> new FlatColumnReader<>( - field, TransformingValueDecoders::getInt64TimestampMillsToShortTimestampDecoder, + field, + TransformingValueDecoders::getInt64TimestampMillsToShortTimestampDecoder, LONG_ADAPTER, memoryContext); case MICROS -> new FlatColumnReader<>( - field, TransformingValueDecoders::getInt64TimestampMicrosToShortTimestampDecoder, + field, + TransformingValueDecoders::getInt64TimestampMicrosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); case NANOS -> new FlatColumnReader<>( - field, TransformingValueDecoders::getInt64TimestampNanosToShortTimestampDecoder, + field, + TransformingValueDecoders::getInt64TimestampNanosToShortTimestampDecoder, LONG_ADAPTER, memoryContext); }; @@ -206,7 +222,8 @@ public static ColumnReader create( memoryContext); }; } - if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && primitiveType == INT64) { + if (type instanceof TimestampWithTimeZoneType timestampWithTimeZoneType + && primitiveType == INT64) { if (!(annotation instanceof TimestampLogicalTypeAnnotation timestampAnnotation)) { throw unsupportedException(type, field); } @@ -214,12 +231,14 @@ public static ColumnReader create( return switch (timestampAnnotation.getUnit()) { case MILLIS -> new FlatColumnReader<>( field, - TransformingValueDecoders::getInt64TimestampMillsToShortTimestampWithTimeZoneDecoder, + TransformingValueDecoders + ::getInt64TimestampMillsToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); case MICROS -> new FlatColumnReader<>( field, - TransformingValueDecoders::getInt64TimestampMicrosToShortTimestampWithTimeZoneDecoder, + TransformingValueDecoders + ::getInt64TimestampMicrosToShortTimestampWithTimeZoneDecoder, LONG_ADAPTER, memoryContext); case NANOS -> throw unsupportedException(type, field); @@ -234,38 +253,48 @@ public static ColumnReader create( memoryContext); }; } - if (type instanceof DecimalType decimalType && decimalType.isShort() && - (primitiveType == INT32 || primitiveType == INT64 || primitiveType == FIXED_LEN_BYTE_ARRAY)) { - if (annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation && - !isDecimalRescaled(decimalAnnotation, decimalType)) { - return new FlatColumnReader<>(field, ValueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); + if (type instanceof DecimalType decimalType + && decimalType.isShort() + && (primitiveType == INT32 + || primitiveType == INT64 + || primitiveType == FIXED_LEN_BYTE_ARRAY)) { + if (annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation + && !isDecimalRescaled(decimalAnnotation, decimalType)) { + return new FlatColumnReader<>( + field, ValueDecoders::getShortDecimalDecoder, LONG_ADAPTER, memoryContext); } } - if (type instanceof DecimalType decimalType && !decimalType.isShort() && - (primitiveType == BINARY || primitiveType == FIXED_LEN_BYTE_ARRAY)) { - if (annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation && - !isDecimalRescaled(decimalAnnotation, decimalType)) { - return new FlatColumnReader<>(field, ValueDecoders::getLongDecimalDecoder, INT128_ADAPTER, memoryContext); + if (type instanceof DecimalType decimalType + && !decimalType.isShort() + && (primitiveType == BINARY || primitiveType == FIXED_LEN_BYTE_ARRAY)) { + if (annotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation + && !isDecimalRescaled(decimalAnnotation, decimalType)) { + return new FlatColumnReader<>( + field, ValueDecoders::getLongDecimalDecoder, INT128_ADAPTER, memoryContext); } } - if (type instanceof VarcharType varcharType && !varcharType.isUnbounded() && primitiveType == BINARY) { + if (type instanceof VarcharType varcharType + && !varcharType.isUnbounded() + && primitiveType == BINARY) { return new FlatColumnReader<>( - field, - ValueDecoders::getBoundedVarcharBinaryDecoder, - BINARY_ADAPTER, - memoryContext); + field, ValueDecoders::getBoundedVarcharBinaryDecoder, BINARY_ADAPTER, memoryContext); } if (type instanceof CharType && primitiveType == BINARY) { - return new FlatColumnReader<>(field, ValueDecoders::getCharBinaryDecoder, BINARY_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getCharBinaryDecoder, BINARY_ADAPTER, memoryContext); } if (type instanceof AbstractVariableWidthType && primitiveType == BINARY) { - return new FlatColumnReader<>(field, ValueDecoders::getBinaryDecoder, BINARY_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getBinaryDecoder, BINARY_ADAPTER, memoryContext); } if (UUID.equals(type) && primitiveType == FIXED_LEN_BYTE_ARRAY) { - // Iceberg 0.11.1 writes UUID as FIXED_LEN_BYTE_ARRAY without logical type annotation (see https://github.com/apache/iceberg/pull/2913) - // To support such files, we bet on the logical type to be UUID based on the Trino UUID type check. + // Iceberg 0.11.1 writes UUID as FIXED_LEN_BYTE_ARRAY without logical type annotation (see + // https://github.com/apache/iceberg/pull/2913) + // To support such files, we bet on the logical type to be UUID based on the Trino UUID type + // check. if (annotation == null || isLogicalUuid(annotation)) { - return new FlatColumnReader<>(field, ValueDecoders::getUuidDecoder, INT128_ADAPTER, memoryContext); + return new FlatColumnReader<>( + field, ValueDecoders::getUuidDecoder, INT128_ADAPTER, memoryContext); } throw unsupportedException(type, field); } @@ -321,12 +350,15 @@ public static ColumnReader create( private static boolean isLogicalUuid(LogicalTypeAnnotation annotation) { return Optional.ofNullable(annotation) - .flatMap(logicalTypeAnnotation -> logicalTypeAnnotation.accept(new LogicalTypeAnnotationVisitor() { - @Override - public Optional visit(UUIDLogicalTypeAnnotation uuidLogicalType) { - return Optional.of(TRUE); - } - })) + .flatMap( + logicalTypeAnnotation -> + logicalTypeAnnotation.accept( + new LogicalTypeAnnotationVisitor() { + @Override + public Optional visit(UUIDLogicalTypeAnnotation uuidLogicalType) { + return Optional.of(TRUE); + } + })) .orElse(FALSE); } @@ -335,25 +367,28 @@ private static Optional createDecimalColumnReader(Primiti .map(decimalType -> DecimalColumnReaderFactory.createReader(field, decimalType)); } - private static boolean isDecimalRescaled(DecimalLogicalTypeAnnotation decimalAnnotation, DecimalType trinoType) { - return decimalAnnotation.getPrecision() != trinoType.getPrecision() || - decimalAnnotation.getScale() != trinoType.getScale(); + private static boolean isDecimalRescaled( + DecimalLogicalTypeAnnotation decimalAnnotation, DecimalType trinoType) { + return decimalAnnotation.getPrecision() != trinoType.getPrecision() + || decimalAnnotation.getScale() != trinoType.getScale(); } private static boolean isIntegerAnnotation(LogicalTypeAnnotation typeAnnotation) { - return typeAnnotation == null || - typeAnnotation instanceof IntLogicalTypeAnnotation || - isZeroScaleDecimalAnnotation(typeAnnotation); + return typeAnnotation == null + || typeAnnotation instanceof IntLogicalTypeAnnotation + || isZeroScaleDecimalAnnotation(typeAnnotation); } private static boolean isZeroScaleDecimalAnnotation(LogicalTypeAnnotation typeAnnotation) { - return typeAnnotation instanceof DecimalLogicalTypeAnnotation && - ((DecimalLogicalTypeAnnotation) typeAnnotation).getScale() == 0; + return typeAnnotation instanceof DecimalLogicalTypeAnnotation + && ((DecimalLogicalTypeAnnotation) typeAnnotation).getScale() == 0; } private static TrinoException unsupportedException(Type type, PrimitiveField field) { return new TrinoException( NOT_SUPPORTED, - format("Unsupported Trino column type (%s) for Parquet column (%s)", type, field.getDescriptor())); + format( + "Unsupported Trino column type (%s) for Parquet column (%s)", + type, field.getDescriptor())); } } diff --git a/trino/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java b/trino/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java index 673454ece0..759b00762e 100644 --- a/trino/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java +++ b/trino/src/main/java/io/trino/parquet/reader/TimestampColumnReader.java @@ -14,6 +14,11 @@ package io.trino.parquet.reader; +import static io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp; +import static io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static java.util.Objects.requireNonNull; + import io.trino.parquet.PrimitiveField; import io.trino.plugin.base.type.DecodedTimestamp; import io.trino.plugin.base.type.TrinoTimestampEncoder; @@ -24,16 +29,8 @@ import io.trino.spi.type.Type; import org.joda.time.DateTimeZone; -import static io.trino.parquet.ParquetTimestampUtils.decodeInt96Timestamp; -import static io.trino.plugin.base.type.TrinoTimestampEncoderFactory.createTimestampEncoder; -import static io.trino.spi.type.TimeZoneKey.UTC_KEY; -import static java.util.Objects.requireNonNull; - -/** - * Copy from trino-parquet TimestampColumnReader and do some change to adapt Arctic - */ -public class TimestampColumnReader - extends PrimitiveColumnReader { +/** Copy from trino-parquet TimestampColumnReader and do some change to adapt Arctic */ +public class TimestampColumnReader extends PrimitiveColumnReader { private final DateTimeZone timeZone; public TimestampColumnReader(PrimitiveField field, DateTimeZone timeZone) { @@ -41,17 +38,19 @@ public TimestampColumnReader(PrimitiveField field, DateTimeZone timeZone) { this.timeZone = requireNonNull(timeZone, "timeZone is null"); } - // TODO: refactor to provide type at construction time (https://github.com/trinodb/trino/issues/5198) + // TODO: refactor to provide type at construction time + // (https://github.com/trinodb/trino/issues/5198) @Override protected void readValue(BlockBuilder blockBuilder, Type type) { if (type instanceof TimestampWithTimeZoneType) { DecodedTimestamp decodedTimestamp = decodeInt96Timestamp(valuesReader.readBytes()); LongTimestampWithTimeZone longTimestampWithTimeZone = - LongTimestampWithTimeZone.fromEpochSecondsAndFraction(decodedTimestamp.epochSeconds(), - decodedTimestamp.nanosOfSecond() * 1000L, UTC_KEY); + LongTimestampWithTimeZone.fromEpochSecondsAndFraction( + decodedTimestamp.epochSeconds(), decodedTimestamp.nanosOfSecond() * 1000L, UTC_KEY); type.writeObject(blockBuilder, longTimestampWithTimeZone); } else { - TrinoTimestampEncoder trinoTimestampEncoder = createTimestampEncoder((TimestampType) type, timeZone); + TrinoTimestampEncoder trinoTimestampEncoder = + createTimestampEncoder((TimestampType) type, timeZone); trinoTimestampEncoder.write(decodeInt96Timestamp(valuesReader.readBytes()), blockBuilder); } } diff --git a/trino/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/trino/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index be1a6650f6..171d2d262b 100644 --- a/trino/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/trino/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.metastore; +import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; +import static org.apache.hadoop.hive.metastore.MetaStoreUtils.isIndexTable; + import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.ValidTxnList; @@ -145,6 +148,7 @@ import org.slf4j.LoggerFactory; import javax.security.auth.login.LoginException; + import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationHandler; @@ -170,12 +174,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; -import static org.apache.hadoop.hive.metastore.MetaStoreUtils.isIndexTable; - -/** - * Copy form hive 2.1.1 to change some code to adapt jdk 11. - */ +/** Copy form hive 2.1.1 to change some code to adapt jdk 11. */ @Public @Unstable public class HiveMetaStoreClient implements IMetaStoreClient { @@ -214,15 +213,19 @@ public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader, Boolean } this.conf = conf; filterHook = loadFilterHooks(); - fileMetadataBatchSize = HiveConf.getIntVar( - conf, HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX); + fileMetadataBatchSize = + HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX); String msUri = conf.getVar(ConfVars.METASTOREURIS); localMetaStore = HiveConfUtil.isEmbeddedMetaStore(msUri); if (localMetaStore) { if (!allowEmbedded) { - throw new MetaException("Embedded metastore is not allowed here. Please configure " + - ConfVars.METASTOREURIS.varname + "; it is currently set to [" + msUri + "]"); + throw new MetaException( + "Embedded metastore is not allowed here. Please configure " + + ConfVars.METASTOREURIS.varname + + "; it is currently set to [" + + msUri + + "]"); } // instantiate the metastore server handler directly instead of connecting // through the network @@ -237,20 +240,20 @@ public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader, Boolean return; } else { if (conf.getBoolVar(ConfVars.METASTORE_FASTPATH)) { - throw new RuntimeException("You can't set hive.metastore.fastpath to true when you're " + - "talking to the thrift metastore service. You must run the metastore locally."); + throw new RuntimeException( + "You can't set hive.metastore.fastpath to true when you're " + + "talking to the thrift metastore service. You must run the metastore locally."); } } // get the number retries retries = HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES); - retryDelaySeconds = conf.getTimeVar( - ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, TimeUnit.SECONDS); + retryDelaySeconds = + conf.getTimeVar(ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, TimeUnit.SECONDS); // user wants file store based configuration if (conf.getVar(HiveConf.ConfVars.METASTOREURIS) != null) { - String[] metastoreUrisString = conf.getVar( - HiveConf.ConfVars.METASTOREURIS).split(","); + String[] metastoreUrisString = conf.getVar(HiveConf.ConfVars.METASTOREURIS).split(","); metastoreUris = new URI[metastoreUrisString.length]; try { int i = 0; @@ -279,8 +282,8 @@ public HiveMetaStoreClient(HiveConf conf, HiveMetaHookLoader hookLoader, Boolean } private MetaStoreFilterHook loadFilterHooks() throws IllegalStateException { - Class authProviderClass = conf - .getClass( + Class authProviderClass = + conf.getClass( HiveConf.ConfVars.METASTORE_FILTER_HOOK.varname, DefaultMetaStoreFilterHookImpl.class, MetaStoreFilterHook.class); @@ -305,8 +308,8 @@ private MetaStoreFilterHook loadFilterHooks() throws IllegalStateException { } /** - * Swaps the first element of the metastoreUris array with a random element from the - * remainder of the array. + * Swaps the first element of the metastoreUris array with a random element from the remainder of + * the array. */ private void promoteRandomMetaStoreURI() { if (metastoreUris.length <= 1) { @@ -342,10 +345,17 @@ public boolean isCompatibleWith(HiveConf conf) { // Since metaVars are all of different types, use string for comparison String oldVar = currentMetaVarsCopy.get(oneVar.varname); String newVar = conf.get(oneVar.varname, ""); - if (oldVar == null || - (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : !oldVar.equalsIgnoreCase(newVar))) { - LOG.info("Mestastore configuration " + oneVar.varname + - " changed from " + oldVar + " to " + newVar); + if (oldVar == null + || (oneVar.isCaseSensitive() + ? !oldVar.equals(newVar) + : !oldVar.equalsIgnoreCase(newVar))) { + LOG.info( + "Mestastore configuration " + + oneVar.varname + + " changed from " + + oldVar + + " to " + + newVar); compatible = false; } } @@ -361,8 +371,9 @@ public void setHiveAddedJars(String addedJars) { public void reconnect() throws MetaException { if (localMetaStore) { // For direct DB connections we don't yet support reestablishing connections. - throw new MetaException("For direct MetaStore DB connections, we don't support retries" + - " at the client level."); + throw new MetaException( + "For direct MetaStore DB connections, we don't support retries" + + " at the client level."); } else { close(); // Swap the first element of the metastoreUris[] with a random element from the rest @@ -381,8 +392,7 @@ public void reconnect() throws MetaException { * @throws MetaException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#alter_table( - *java.lang.String, java.lang.String, - * org.apache.hadoop.hive.metastore.api.Table) + * java.lang.String, java.lang.String, org.apache.hadoop.hive.metastore.api.Table) */ @Override public void alter_table(String dbname, String tblName, Table newTbl) @@ -391,8 +401,8 @@ public void alter_table(String dbname, String tblName, Table newTbl) } public void alter_table_with_environmentContext( - String dbname, String tblName, Table newTbl, - EnvironmentContext envContext) throws InvalidOperationException, MetaException, TException { + String dbname, String tblName, Table newTbl, EnvironmentContext envContext) + throws InvalidOperationException, MetaException, TException { client.alter_table_with_environment_context(dbname, tblName, newTbl, envContext); } @@ -405,14 +415,12 @@ public void alter_table_with_environmentContext( * @throws MetaException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#rename_partition( - *java.lang.String, java.lang.String, java.util.List, org.apache.hadoop.hive.metastore.api.Partition) + * java.lang.String, java.lang.String, java.util.List, + * org.apache.hadoop.hive.metastore.api.Partition) */ @Override public void renamePartition( - final String dbname, - final String name, - final List partVals, - final Partition newPart) + final String dbname, final String name, final List partVals, final Partition newPart) throws InvalidOperationException, MetaException, TException { client.rename_partition(dbname, name, partVals, newPart); } @@ -423,8 +431,8 @@ private void open() throws MetaException { boolean useSasl = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL); boolean useFramedTransport = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT); boolean useCompactProtocol = conf.getBoolVar(ConfVars.METASTORE_USE_THRIFT_COMPACT_PROTOCOL); - int clientSocketTimeout = (int) conf.getTimeVar( - ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); + int clientSocketTimeout = + (int) conf.getTimeVar(ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, TimeUnit.MILLISECONDS); for (int attempt = 0; !isConnected && attempt < retries; ++attempt) { for (URI store : metastoreUris) { @@ -447,15 +455,25 @@ private void open() throws MetaException { tokenStrForm = Utils.getTokenStrForm(tokenSig); if (tokenStrForm != null) { // authenticate using delegation tokens via the "DIGEST" mechanism - transport = authBridge.createClientTransport(null, store.getHost(), - "DIGEST", tokenStrForm, transport, - MetaStoreUtils.getMetaStoreSaslProperties(conf)); + transport = + authBridge.createClientTransport( + null, + store.getHost(), + "DIGEST", + tokenStrForm, + transport, + MetaStoreUtils.getMetaStoreSaslProperties(conf)); } else { String principalConfig = conf.getVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL); - transport = authBridge.createClientTransport( - principalConfig, store.getHost(), "KERBEROS", null, - transport, MetaStoreUtils.getMetaStoreSaslProperties(conf)); + transport = + authBridge.createClientTransport( + principalConfig, + store.getHost(), + "KERBEROS", + null, + transport, + MetaStoreUtils.getMetaStoreSaslProperties(conf)); } } catch (IOException ioe) { LOG.error("Couldn't create client transport", ioe); @@ -473,7 +491,9 @@ private void open() throws MetaException { client = new ThriftHiveMetastore.Client(protocol); try { transport.open(); - LOG.info("Opened a connection to metastore, current connections: " + connCount.incrementAndGet()); + LOG.info( + "Opened a connection to metastore, current connections: " + + connCount.incrementAndGet()); isConnected = true; } catch (TTransportException e) { tte = e; @@ -491,19 +511,24 @@ private void open() throws MetaException { UserGroupInformation ugi = Utils.getUGI(); client.set_ugi(ugi.getUserName(), Arrays.asList(ugi.getGroupNames())); } catch (LoginException e) { - LOG.warn("Failed to do login. set_ugi() is not successful, " + - "Continuing without it.", e); + LOG.warn( + "Failed to do login. set_ugi() is not successful, " + "Continuing without it.", + e); } catch (IOException e) { - LOG.warn("Failed to find ugi of client set_ugi() is not successful, " + - "Continuing without it.", e); + LOG.warn( + "Failed to find ugi of client set_ugi() is not successful, " + + "Continuing without it.", + e); } catch (TException e) { - LOG.warn("set_ugi() not successful, Likely cause: new client talking to old server. " + - "Continuing without it.", e); + LOG.warn( + "set_ugi() not successful, Likely cause: new client talking to old server. " + + "Continuing without it.", + e); } } } catch (MetaException e) { - LOG.error("Unable to connect to metastore with URI " + store + - " in attempt " + attempt, e); + LOG.error( + "Unable to connect to metastore with URI " + store + " in attempt " + attempt, e); } if (isConnected) { break; @@ -521,8 +546,10 @@ private void open() throws MetaException { } if (!isConnected) { - throw new MetaException("Could not connect to meta store using any of the URIs provided." + - " Most recent failure: " + StringUtils.stringifyException(tte)); + throw new MetaException( + "Could not connect to meta store using any of the URIs provided." + + " Most recent failure: " + + StringUtils.stringifyException(tte)); } snapshotActiveConf(); @@ -556,7 +583,8 @@ public void close() { // just in case, we make this call. if ((transport != null) && transport.isOpen()) { transport.close(); - LOG.info("Closed a connection to metastore, current connections: " + connCount.decrementAndGet()); + LOG.info( + "Closed a connection to metastore, current connections: " + connCount.decrementAndGet()); } } @@ -578,18 +606,16 @@ public String getMetaConf(String key) throws TException { * @throws MetaException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#add_partition( - * org.apache.hadoop.hive.metastore.api.Partition) + * org.apache.hadoop.hive.metastore.api.Partition) */ @Override public Partition add_partition(Partition newPart) - throws InvalidObjectException, AlreadyExistsException, MetaException, - TException { + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { return add_partition(newPart, null); } public Partition add_partition(Partition newPart, EnvironmentContext envContext) - throws InvalidObjectException, AlreadyExistsException, MetaException, - TException { + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { Partition p = client.add_partition_with_environment_context(newPart, envContext); return fastpath ? p : deepCopy(p); } @@ -604,8 +630,7 @@ public Partition add_partition(Partition newPart, EnvironmentContext envContext) */ @Override public int add_partitions(List newParts) - throws InvalidObjectException, AlreadyExistsException, MetaException, - TException { + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { return client.add_partitions(newParts); } @@ -617,8 +642,8 @@ public List add_partitions( return needResults ? new ArrayList() : null; } Partition part = parts.get(0); - AddPartitionsRequest req = new AddPartitionsRequest( - part.getDbName(), part.getTableName(), parts, ifNotExists); + AddPartitionsRequest req = + new AddPartitionsRequest(part.getDbName(), part.getTableName(), parts, ifNotExists); req.setNeedResult(needResults); AddPartitionsResult result = client.add_partitions_req(req); return needResults ? filterHook.filterPartitions(result.getPartitions()) : null; @@ -638,23 +663,21 @@ public int add_partitions_pspec(PartitionSpecProxy partitionSpec) throws TExcept * @throws AlreadyExistsException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#append_partition(java.lang.String, - * java.lang.String, java.util.List) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#append_partition(java.lang.String, + * java.lang.String, java.util.List) */ @Override - public Partition appendPartition( - String dbName, String tableName, - List partVals) throws InvalidObjectException, - AlreadyExistsException, MetaException, TException { + public Partition appendPartition(String dbName, String tableName, List partVals) + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { return appendPartition(dbName, tableName, partVals, null); } public Partition appendPartition( - String dbName, String tableName, List partVals, - EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, - MetaException, TException { - Partition p = client.append_partition_with_environment_context(dbName, tableName, - partVals, envContext); + String dbName, String tableName, List partVals, EnvironmentContext envContext) + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { + Partition p = + client.append_partition_with_environment_context(dbName, tableName, partVals, envContext); return fastpath ? p : deepCopy(p); } @@ -665,48 +688,51 @@ public Partition appendPartition(String dbName, String tableName, String partNam } public Partition appendPartition( - String dbName, String tableName, String partName, - EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, - MetaException, TException { - Partition p = client.append_partition_by_name_with_environment_context(dbName, tableName, - partName, envContext); + String dbName, String tableName, String partName, EnvironmentContext envContext) + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { + Partition p = + client.append_partition_by_name_with_environment_context( + dbName, tableName, partName, envContext); return fastpath ? p : deepCopy(p); } /** * Exchange the partition between two tables * - * @param partitionSpecs partitions specs of the parent partition to be exchanged - * @param destDb the db of the destination table - * @param destinationTableName the destination table name - * @ @return new partition after exchanging + * @param partitionSpecs partitions specs of the parent partition to be exchanged + * @param destDb the db of the destination table + * @param destinationTableName the destination table name @ @return new partition after exchanging */ @Override public Partition exchange_partition( Map partitionSpecs, - String sourceDb, String sourceTable, String destDb, - String destinationTableName) throws MetaException, - NoSuchObjectException, InvalidObjectException, TException { - return client.exchange_partition(partitionSpecs, sourceDb, sourceTable, - destDb, destinationTableName); + String sourceDb, + String sourceTable, + String destDb, + String destinationTableName) + throws MetaException, NoSuchObjectException, InvalidObjectException, TException { + return client.exchange_partition( + partitionSpecs, sourceDb, sourceTable, destDb, destinationTableName); } /** * Exchange the partitions between two tables * - * @param partitionSpecs partitions specs of the parent partition to be exchanged - * @param destDb the db of the destination table - * @param destinationTableName the destination table name - * @ @return new partitions after exchanging + * @param partitionSpecs partitions specs of the parent partition to be exchanged + * @param destDb the db of the destination table + * @param destinationTableName the destination table name @ @return new partitions after + * exchanging */ @Override public List exchange_partitions( Map partitionSpecs, - String sourceDb, String sourceTable, String destDb, - String destinationTableName) throws MetaException, - NoSuchObjectException, InvalidObjectException, TException { - return client.exchange_partitions(partitionSpecs, sourceDb, sourceTable, - destDb, destinationTableName); + String sourceDb, + String sourceTable, + String destDb, + String destinationTableName) + throws MetaException, NoSuchObjectException, InvalidObjectException, TException { + return client.exchange_partitions( + partitionSpecs, sourceDb, sourceTable, destDb, destinationTableName); } @Override @@ -737,16 +763,18 @@ public void createDatabase(Database db) * @throws NoSuchObjectException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#create_table( - * org.apache.hadoop.hive.metastore.api.Table) + * org.apache.hadoop.hive.metastore.api.Table) */ @Override - public void createTable(Table tbl) throws AlreadyExistsException, - InvalidObjectException, MetaException, NoSuchObjectException, TException { + public void createTable(Table tbl) + throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, + TException { createTable(tbl, null); } - public void createTable(Table tbl, EnvironmentContext envContext) throws AlreadyExistsException, - InvalidObjectException, MetaException, NoSuchObjectException, TException { + public void createTable(Table tbl, EnvironmentContext envContext) + throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, + TException { HiveMetaHook hook = getHook(tbl); if (hook != null) { hook.preCreateTable(tbl); @@ -768,10 +796,9 @@ public void createTable(Table tbl, EnvironmentContext envContext) throws Already @Override public void createTableWithConstraints( - Table tbl, - List primaryKeys, List foreignKeys) - throws AlreadyExistsException, InvalidObjectException, - MetaException, NoSuchObjectException, TException { + Table tbl, List primaryKeys, List foreignKeys) + throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, + TException { HiveMetaHook hook = getHook(tbl); if (hook != null) { hook.preCreateTable(tbl); @@ -792,20 +819,20 @@ public void createTableWithConstraints( } @Override - public void dropConstraint(String dbName, String tableName, String constraintName) throws - NoSuchObjectException, MetaException, TException { + public void dropConstraint(String dbName, String tableName, String constraintName) + throws NoSuchObjectException, MetaException, TException { client.drop_constraint(new DropConstraintRequest(dbName, tableName, constraintName)); } @Override - public void addPrimaryKey(List primaryKeyCols) throws - NoSuchObjectException, MetaException, TException { + public void addPrimaryKey(List primaryKeyCols) + throws NoSuchObjectException, MetaException, TException { client.add_primary_key(new AddPrimaryKeyRequest(primaryKeyCols)); } @Override - public void addForeignKey(List foreignKeyCols) throws - NoSuchObjectException, MetaException, TException { + public void addForeignKey(List foreignKeyCols) + throws NoSuchObjectException, MetaException, TException { client.add_foreign_key(new AddForeignKeyRequest(foreignKeyCols)); } @@ -817,10 +844,10 @@ public void addForeignKey(List foreignKeyCols) throws * @throws MetaException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#create_type( - * org.apache.hadoop.hive.metastore.api.Type) + * org.apache.hadoop.hive.metastore.api.Type) */ - public boolean createType(Type type) throws AlreadyExistsException, - InvalidObjectException, MetaException, TException { + public boolean createType(Type type) + throws AlreadyExistsException, InvalidObjectException, MetaException, TException { return client.create_type(type); } @@ -831,7 +858,7 @@ public boolean createType(Type type) throws AlreadyExistsException, * @throws MetaException * @throws TException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_database( - * java.lang.String, boolean, boolean) + * java.lang.String, boolean, boolean) */ @Override public void dropDatabase(String name) @@ -846,7 +873,8 @@ public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownD } @Override - public void dropDatabase(String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade) + public void dropDatabase( + String name, boolean deleteData, boolean ignoreUnknownDb, boolean cascade) throws NoSuchObjectException, InvalidOperationException, MetaException, TException { try { getDatabase(name); @@ -895,34 +923,40 @@ public boolean dropPartition(String dbName, String tableName, String partName, b * @throws NoSuchObjectException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_partition(java.lang.String, - * java.lang.String, java.util.List, boolean) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_partition(java.lang.String, + * java.lang.String, java.util.List, boolean) */ @Override public boolean dropPartition( - String dbName, String tblName, - List partVals, boolean deleteData) throws NoSuchObjectException, - MetaException, TException { + String dbName, String tblName, List partVals, boolean deleteData) + throws NoSuchObjectException, MetaException, TException { return dropPartition(dbName, tblName, partVals, deleteData, null); } @Override public boolean dropPartition( - String dbName, String tblName, - List partVals, PartitionDropOptions options) throws TException { - return dropPartition(dbName, tblName, partVals, options.deleteData, + String dbName, String tblName, List partVals, PartitionDropOptions options) + throws TException { + return dropPartition( + dbName, + tblName, + partVals, + options.deleteData, options.purgeData ? getEnvironmentContextWithIfPurgeSet() : null); } public boolean dropPartition( - String dbName, String tblName, List partVals, - boolean deleteData, EnvironmentContext envContext) throws NoSuchObjectException, - MetaException, TException { - return client.drop_partition_with_environment_context(dbName, tblName, partVals, deleteData, - envContext); + String dbName, + String tblName, + List partVals, + boolean deleteData, + EnvironmentContext envContext) + throws NoSuchObjectException, MetaException, TException { + return client.drop_partition_with_environment_context( + dbName, tblName, partVals, deleteData, envContext); } - /** * @param tblName * @param dbName @@ -931,19 +965,18 @@ public boolean dropPartition( * @throws NoSuchObjectException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_partition(java.lang.String, - * java.lang.String, java.util.List, boolean) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_partition(java.lang.String, + * java.lang.String, java.util.List, boolean) */ - public boolean dropPartition( - String dbName, String tblName, - List partVals) throws NoSuchObjectException, MetaException, - TException { + public boolean dropPartition(String dbName, String tblName, List partVals) + throws NoSuchObjectException, MetaException, TException { return dropPartition(dbName, tblName, partVals, true, null); } public boolean dropPartition( - String dbName, String tblName, List partVals, - EnvironmentContext envContext) throws NoSuchObjectException, MetaException, TException { + String dbName, String tblName, List partVals, EnvironmentContext envContext) + throws NoSuchObjectException, MetaException, TException { return dropPartition(dbName, tblName, partVals, true, envContext); } @@ -954,16 +987,22 @@ public boolean dropPartition(String dbName, String tableName, String partName, b } public boolean dropPartition( - String dbName, String tableName, String partName, boolean deleteData, - EnvironmentContext envContext) throws NoSuchObjectException, MetaException, TException { - return client.drop_partition_by_name_with_environment_context(dbName, tableName, partName, - deleteData, envContext); + String dbName, + String tableName, + String partName, + boolean deleteData, + EnvironmentContext envContext) + throws NoSuchObjectException, MetaException, TException { + return client.drop_partition_by_name_with_environment_context( + dbName, tableName, partName, deleteData, envContext); } @Override public List dropPartitions( - String dbName, String tblName, - List> partExprs, PartitionDropOptions options) + String dbName, + String tblName, + List> partExprs, + PartitionDropOptions options) throws TException { RequestPartsSpec rps = new RequestPartsSpec(); List exprs = new ArrayList(partExprs.size()); @@ -987,11 +1026,18 @@ public List dropPartitions( @Override public List dropPartitions( - String dbName, String tblName, - List> partExprs, boolean deleteData, - boolean ifExists, boolean needResult) throws NoSuchObjectException, MetaException, TException { + String dbName, + String tblName, + List> partExprs, + boolean deleteData, + boolean ifExists, + boolean needResult) + throws NoSuchObjectException, MetaException, TException { - return dropPartitions(dbName, tblName, partExprs, + return dropPartitions( + dbName, + tblName, + partExprs, PartitionDropOptions.instance() .deleteData(deleteData) .ifExists(ifExists) @@ -1000,14 +1046,18 @@ public List dropPartitions( @Override public List dropPartitions( - String dbName, String tblName, - List> partExprs, boolean deleteData, - boolean ifExists) throws NoSuchObjectException, MetaException, TException { + String dbName, + String tblName, + List> partExprs, + boolean deleteData, + boolean ifExists) + throws NoSuchObjectException, MetaException, TException { // By default, we need the results from dropPartitions(); - return dropPartitions(dbName, tblName, partExprs, - PartitionDropOptions.instance() - .deleteData(deleteData) - .ifExists(ifExists)); + return dropPartitions( + dbName, + tblName, + partExprs, + PartitionDropOptions.instance().deleteData(deleteData).ifExists(ifExists)); } /** @@ -1016,26 +1066,22 @@ public List dropPartitions( * @see #dropTable(String, String, boolean, boolean, EnvironmentContext) */ @Override - public void dropTable( - String dbname, String name, boolean deleteData, - boolean ignoreUnknownTab) throws MetaException, TException, - NoSuchObjectException, UnsupportedOperationException { + public void dropTable(String dbname, String name, boolean deleteData, boolean ignoreUnknownTab) + throws MetaException, TException, NoSuchObjectException, UnsupportedOperationException { dropTable(dbname, name, deleteData, ignoreUnknownTab, null); } /** * Drop the table and choose whether to save the data in the trash. * - * @param ifPurge completely purge the table (skipping trash) while removing - * data from warehouse + * @param ifPurge completely purge the table (skipping trash) while removing data from warehouse * @see #dropTable(String, String, boolean, boolean, EnvironmentContext) */ @Override public void dropTable( - String dbname, String name, boolean deleteData, - boolean ignoreUnknownTab, boolean ifPurge) + String dbname, String name, boolean deleteData, boolean ignoreUnknownTab, boolean ifPurge) throws MetaException, TException, NoSuchObjectException, UnsupportedOperationException { - //build new environmentContext with ifPurge; + // build new environmentContext with ifPurge; EnvironmentContext envContext = null; if (ifPurge) { Map warehouseOptions = null; @@ -1046,9 +1092,7 @@ public void dropTable( dropTable(dbname, name, deleteData, ignoreUnknownTab, envContext); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override @Deprecated public void dropTable(String tableName, boolean deleteData) @@ -1066,25 +1110,29 @@ public void dropTable(String dbname, String name) } /** - * Drop the table and choose whether to: delete the underlying table data; - * throw if the table doesn't exist; save the data in the trash. + * Drop the table and choose whether to: delete the underlying table data; throw if the table + * doesn't exist; save the data in the trash. * * @param dbname * @param name - * @param deleteData delete the underlying data or just delete the table in metadata + * @param deleteData delete the underlying data or just delete the table in metadata * @param ignoreUnknownTab don't throw if the requested table doesn't exist - * @param envContext for communicating with thrift - * @throws MetaException could not drop table properly - * @throws NoSuchObjectException the table wasn't found - * @throws TException a thrift communication error occurred + * @param envContext for communicating with thrift + * @throws MetaException could not drop table properly + * @throws NoSuchObjectException the table wasn't found + * @throws TException a thrift communication error occurred * @throws UnsupportedOperationException dropping an index table is not allowed - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_table(java.lang.String, - * java.lang.String, boolean) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#drop_table(java.lang.String, + * java.lang.String, boolean) */ public void dropTable( - String dbname, String name, boolean deleteData, - boolean ignoreUnknownTab, EnvironmentContext envContext) throws MetaException, TException, - NoSuchObjectException, UnsupportedOperationException { + String dbname, + String name, + boolean deleteData, + boolean ignoreUnknownTab, + EnvironmentContext envContext) + throws MetaException, TException, NoSuchObjectException, UnsupportedOperationException { Table tbl; try { tbl = getTable(dbname, name); @@ -1135,10 +1183,10 @@ public boolean dropType(String type) throws NoSuchObjectException, MetaException * @return map of types * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_type_all(java.lang.String) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_type_all(java.lang.String) */ - public Map getTypeAll(String name) throws MetaException, - TException { + public Map getTypeAll(String name) throws MetaException, TException { Map result = null; Map fromClient = client.get_type_all(name); if (fromClient != null) { @@ -1150,12 +1198,9 @@ public Map getTypeAll(String name) throws MetaException, return result; } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override - public List getDatabases(String databasePattern) - throws MetaException { + public List getDatabases(String databasePattern) throws MetaException { try { return filterHook.filterDatabases(client.get_databases(databasePattern)); } catch (Exception e) { @@ -1164,9 +1209,7 @@ public List getDatabases(String databasePattern) return null; } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List getAllDatabases() throws MetaException { try { @@ -1187,59 +1230,60 @@ public List getAllDatabases() throws MetaException { * @throws TException */ @Override - public List listPartitions( - String dbName, String tblName, - short maxParts) throws NoSuchObjectException, MetaException, TException { + public List listPartitions(String dbName, String tblName, short maxParts) + throws NoSuchObjectException, MetaException, TException { List parts = client.get_partitions(dbName, tblName, maxParts); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override public List listPartitions( - String dbName, String tblName, - List partVals, short maxParts) + String dbName, String tblName, List partVals, short maxParts) throws NoSuchObjectException, MetaException, TException { List parts = client.get_partitions_ps(dbName, tblName, partVals, maxParts); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override - public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, int maxParts) throws TException { - return PartitionSpecProxy.Factory.get(filterHook.filterPartitionSpecs( - client.get_partitions_pspec(dbName, tableName, maxParts))); + public PartitionSpecProxy listPartitionSpecs(String dbName, String tableName, int maxParts) + throws TException { + return PartitionSpecProxy.Factory.get( + filterHook.filterPartitionSpecs(client.get_partitions_pspec(dbName, tableName, maxParts))); } @Override public List listPartitionsWithAuthInfo( - String dbName, - String tblName, short maxParts, String userName, List groupNames) + String dbName, String tblName, short maxParts, String userName, List groupNames) throws NoSuchObjectException, MetaException, TException { - List parts = client.get_partitions_with_auth(dbName, tblName, maxParts, - userName, groupNames); + List parts = + client.get_partitions_with_auth(dbName, tblName, maxParts, userName, groupNames); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override public List listPartitionsWithAuthInfo( String dbName, - String tblName, List partVals, short maxParts, - String userName, List groupNames) throws NoSuchObjectException, - MetaException, TException { - List parts = client.get_partitions_ps_with_auth(dbName, - tblName, partVals, maxParts, userName, groupNames); + String tblName, + List partVals, + short maxParts, + String userName, + List groupNames) + throws NoSuchObjectException, MetaException, TException { + List parts = + client.get_partitions_ps_with_auth( + dbName, tblName, partVals, maxParts, userName, groupNames); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } /** * Get list of partitions matching specified filter * - * @param dbName the database name - * @param tblName the table name - * @param filter the filter string, - * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can - * be done only on string partition keys. - * @param maxParts the maximum number of partitions to return, - * all partitions are returned if -1 is passed + * @param dbName the database name + * @param tblName the table name + * @param filter the filter string, for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". + * Filtering can be done only on string partition keys. + * @param maxParts the maximum number of partitions to return, all partitions are returned if -1 + * is passed * @return list of partitions * @throws MetaException * @throws NoSuchObjectException @@ -1247,30 +1291,33 @@ public List listPartitionsWithAuthInfo( */ @Override public List listPartitionsByFilter( - String dbName, String tblName, - String filter, short maxParts) throws MetaException, - NoSuchObjectException, TException { + String dbName, String tblName, String filter, short maxParts) + throws MetaException, NoSuchObjectException, TException { List parts = client.get_partitions_by_filter(dbName, tblName, filter, maxParts); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override public PartitionSpecProxy listPartitionSpecsByFilter( - String dbName, String tblName, - String filter, int maxParts) throws MetaException, - NoSuchObjectException, TException { - return PartitionSpecProxy.Factory.get(filterHook.filterPartitionSpecs( - client.get_part_specs_by_filter(dbName, tblName, filter, maxParts))); + String dbName, String tblName, String filter, int maxParts) + throws MetaException, NoSuchObjectException, TException { + return PartitionSpecProxy.Factory.get( + filterHook.filterPartitionSpecs( + client.get_part_specs_by_filter(dbName, tblName, filter, maxParts))); } @Override public boolean listPartitionsByExpr( - String dbName, String tblName, byte[] expr, - String defaultPartitionName, short maxParts, List result) + String dbName, + String tblName, + byte[] expr, + String defaultPartitionName, + short maxParts, + List result) throws TException { assert result != null; - PartitionsByExprRequest req = new PartitionsByExprRequest( - dbName, tblName, ByteBuffer.wrap(expr)); + PartitionsByExprRequest req = + new PartitionsByExprRequest(dbName, tblName, ByteBuffer.wrap(expr)); if (defaultPartitionName != null) { req.setDefaultPartitionName(defaultPartitionName); } @@ -1282,8 +1329,8 @@ public boolean listPartitionsByExpr( r = client.get_partitions_by_expr(req); } catch (TApplicationException te) { // TODO: backward compat for Hive <= 0.12. Can be removed later. - if (te.getType() != TApplicationException.UNKNOWN_METHOD && - te.getType() != TApplicationException.WRONG_METHOD_NAME) { + if (te.getType() != TApplicationException.UNKNOWN_METHOD + && te.getType() != TApplicationException.WRONG_METHOD_NAME) { throw te; } throw new IncompatibleMetastoreException( @@ -1305,11 +1352,11 @@ public boolean listPartitionsByExpr( * @throws NoSuchObjectException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_database(java.lang.String) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_database(java.lang.String) */ @Override - public Database getDatabase(String name) throws NoSuchObjectException, - MetaException, TException { + public Database getDatabase(String name) throws NoSuchObjectException, MetaException, TException { Database d = client.get_database(name); return fastpath ? d : deepCopy(filterHook.filterDatabase(d)); } @@ -1321,13 +1368,13 @@ public Database getDatabase(String name) throws NoSuchObjectException, * @return the partition * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_partition(java.lang.String, - * java.lang.String, java.util.List) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_partition(java.lang.String, + * java.lang.String, java.util.List) */ @Override - public Partition getPartition( - String dbName, String tblName, - List partVals) throws NoSuchObjectException, MetaException, TException { + public Partition getPartition(String dbName, String tblName, List partVals) + throws NoSuchObjectException, MetaException, TException { Partition p = client.get_partition(dbName, tblName, partVals); return fastpath ? p : deepCopy(filterHook.filterPartition(p)); } @@ -1340,21 +1387,21 @@ public Partition getPartition(String db, String tableName, String partName) } @Override - public List getPartitionsByNames( - String dbName, String tblName, - List partNames) throws NoSuchObjectException, MetaException, TException { + public List getPartitionsByNames(String dbName, String tblName, List partNames) + throws NoSuchObjectException, MetaException, TException { List parts = client.get_partitions_by_names(dbName, tblName, partNames); return fastpath ? parts : deepCopyPartitions(filterHook.filterPartitions(parts)); } @Override public Partition getPartitionWithAuthInfo( - String dbName, String tblName, - List partVals, String userName, List groupNames) - throws MetaException, UnknownTableException, NoSuchObjectException, - TException { - Partition p = client.get_partition_with_auth(dbName, tblName, partVals, userName, - groupNames); + String dbName, + String tblName, + List partVals, + String userName, + List groupNames) + throws MetaException, UnknownTableException, NoSuchObjectException, TException { + Partition p = client.get_partition_with_auth(dbName, tblName, partVals, userName, groupNames); return fastpath ? p : deepCopy(filterHook.filterPartition(p)); } @@ -1367,29 +1414,24 @@ public Partition getPartitionWithAuthInfo( * @throws TException * @throws NoSuchObjectException * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_table(java.lang.String, - * java.lang.String) + * java.lang.String) */ @Override - public Table getTable(String dbname, String name) throws MetaException, - TException, NoSuchObjectException { + public Table getTable(String dbname, String name) + throws MetaException, TException, NoSuchObjectException { Table t = client.get_table(dbname, name); return fastpath ? t : deepCopy(filterHook.filterTable(t)); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override @Deprecated - public Table getTable(String tableName) throws MetaException, TException, - NoSuchObjectException { + public Table getTable(String tableName) throws MetaException, TException, NoSuchObjectException { Table t = getTable(DEFAULT_DATABASE_NAME, tableName); return fastpath ? t : filterHook.filterTable(t); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List getTableObjectsByName(String dbName, List tableNames) throws MetaException, InvalidOperationException, UnknownDBException, TException { @@ -1397,15 +1439,12 @@ public List
getTableObjectsByName(String dbName, List tableNames) return fastpath ? tabs : deepCopyTables(filterHook.filterTables(tabs)); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List listTableNamesByFilter(String dbName, String filter, short maxTables) throws MetaException, TException, InvalidOperationException, UnknownDBException { return filterHook.filterTableNames( - dbName, - client.get_table_names_by_filter(dbName, filter, maxTables)); + dbName, client.get_table_names_by_filter(dbName, filter, maxTables)); } /** @@ -1420,9 +1459,7 @@ public Type getType(String name) throws NoSuchObjectException, MetaException, TE return deepCopy(client.get_type(name)); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List getTables(String dbname, String tablePattern) throws MetaException { try { @@ -1434,8 +1471,8 @@ public List getTables(String dbname, String tablePattern) throws MetaExc } @Override - public List getTableMeta(String dbPatterns, String tablePatterns, List tableTypes) - throws MetaException { + public List getTableMeta( + String dbPatterns, String tablePatterns, List tableTypes) throws MetaException { try { return filterNames(client.get_table_meta(dbPatterns, tablePatterns, tableTypes)); } catch (Exception e) { @@ -1464,9 +1501,7 @@ private List filterNames(List metas) throws MetaException return filtered; } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List getAllTables(String dbname) throws MetaException { try { @@ -1478,8 +1513,8 @@ public List getAllTables(String dbname) throws MetaException { } @Override - public boolean tableExists(String databaseName, String tableName) throws MetaException, - TException, UnknownDBException { + public boolean tableExists(String databaseName, String tableName) + throws MetaException, TException, UnknownDBException { try { return filterHook.filterTable(client.get_table(databaseName, tableName)) != null; } catch (NoSuchObjectException e) { @@ -1487,55 +1522,49 @@ public boolean tableExists(String databaseName, String tableName) throws MetaExc } } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override @Deprecated - public boolean tableExists(String tableName) throws MetaException, - TException, UnknownDBException { + public boolean tableExists(String tableName) + throws MetaException, TException, UnknownDBException { return tableExists(DEFAULT_DATABASE_NAME, tableName); } @Override - public List listPartitionNames( - String dbName, String tblName, - short max) throws MetaException, TException { - return filterHook.filterPartitionNames(dbName, tblName, - client.get_partition_names(dbName, tblName, max)); + public List listPartitionNames(String dbName, String tblName, short max) + throws MetaException, TException { + return filterHook.filterPartitionNames( + dbName, tblName, client.get_partition_names(dbName, tblName, max)); } @Override public List listPartitionNames( - String dbName, String tblName, - List partVals, short maxParts) + String dbName, String tblName, List partVals, short maxParts) throws MetaException, TException, NoSuchObjectException { - return filterHook.filterPartitionNames(dbName, tblName, - client.get_partition_names_ps(dbName, tblName, partVals, maxParts)); + return filterHook.filterPartitionNames( + dbName, tblName, client.get_partition_names_ps(dbName, tblName, partVals, maxParts)); } /** * Get number of partitions matching specified filter * - * @param dbName the database name + * @param dbName the database name * @param tblName the table name - * @param filter the filter string, - * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can - * be done only on string partition keys. + * @param filter the filter string, for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". + * Filtering can be done only on string partition keys. * @return number of partitions * @throws MetaException * @throws NoSuchObjectException * @throws TException */ - public int getNumPartitionsByFilter( - String dbName, String tblName, - String filter) throws MetaException, - NoSuchObjectException, TException { + public int getNumPartitionsByFilter(String dbName, String tblName, String filter) + throws MetaException, NoSuchObjectException, TException { return client.get_num_partitions_by_filter(dbName, tblName, filter); } @Override - public void alter_partition(String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext) + public void alter_partition( + String dbName, String tblName, Partition newPart, EnvironmentContext environmentContext) throws InvalidOperationException, MetaException, TException { client.alter_partition_with_environment_context(dbName, tblName, newPart, environmentContext); } @@ -1563,13 +1592,13 @@ public void alterDatabase(String dbName, Database db) * @throws UnknownDBException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_fields(java.lang.String, - * java.lang.String) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_fields(java.lang.String, + * java.lang.String) */ @Override public List getFields(String db, String tableName) - throws MetaException, TException, UnknownTableException, - UnknownDBException { + throws MetaException, TException, UnknownTableException, UnknownDBException { List fields = client.get_fields(db, tableName); return fastpath ? fields : deepCopyFieldSchemas(fields); } @@ -1577,7 +1606,7 @@ public List getFields(String db, String tableName) /** * create an index * - * @param index the index object + * @param index the index object * @param indexTable which stores the index data * @throws InvalidObjectException * @throws MetaException @@ -1587,7 +1616,8 @@ public List getFields(String db, String tableName) */ @Override public void createIndex(Index index, Table indexTable) - throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, TException { + throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, + TException { client.add_index(index, indexTable); } @@ -1599,8 +1629,9 @@ public void createIndex(Index index, Table indexTable) * @throws InvalidOperationException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#alter_index(java.lang.String, - * java.lang.String, java.lang.String, org.apache.hadoop.hive.metastore.api.Index) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#alter_index(java.lang.String, + * java.lang.String, java.lang.String, org.apache.hadoop.hive.metastore.api.Index) */ @Override public void alter_index(String dbname, String baseTblName, String idxName, Index newIdx) @@ -1620,8 +1651,7 @@ public void alter_index(String dbname, String baseTblName, String idxName, Index */ @Override public Index getIndex(String dbName, String tblName, String indexName) - throws MetaException, UnknownTableException, NoSuchObjectException, - TException { + throws MetaException, UnknownTableException, NoSuchObjectException, TException { return deepCopy(filterHook.filterIndex(client.get_index_by_name(dbName, tblName, indexName))); } @@ -1639,7 +1669,8 @@ public Index getIndex(String dbName, String tblName, String indexName) @Override public List listIndexNames(String dbName, String tblName, short max) throws MetaException, TException { - return filterHook.filterIndexNames(dbName, tblName, client.get_index_names(dbName, tblName, max)); + return filterHook.filterIndexNames( + dbName, tblName, client.get_index_names(dbName, tblName, max)); } /** @@ -1665,42 +1696,36 @@ public List getPrimaryKeys(PrimaryKeysRequest req) } @Override - public List getForeignKeys(ForeignKeysRequest req) throws MetaException, - NoSuchObjectException, TException { + public List getForeignKeys(ForeignKeysRequest req) + throws MetaException, NoSuchObjectException, TException { return client.get_foreign_keys(req).getForeignKeys(); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override @Deprecated - //use setPartitionColumnStatistics instead + // use setPartitionColumnStatistics instead public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, - InvalidInputException { + InvalidInputException { return client.update_table_column_statistics(statsObj); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override @Deprecated - //use setPartitionColumnStatistics instead + // use setPartitionColumnStatistics instead public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, - InvalidInputException { + InvalidInputException { return client.update_partition_column_statistics(statsObj); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, - InvalidInputException { + InvalidInputException { return client.set_aggr_stats_for(request); } @@ -1714,47 +1739,42 @@ public void flushCache() { } } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public List getTableColumnStatistics( - String dbName, String tableName, - List colNames) throws NoSuchObjectException, MetaException, TException, - InvalidInputException, InvalidObjectException { - return client.get_table_statistics_req( - new TableStatsRequest(dbName, tableName, colNames)).getTableStats(); + String dbName, String tableName, List colNames) + throws NoSuchObjectException, MetaException, TException, InvalidInputException, + InvalidObjectException { + return client + .get_table_statistics_req(new TableStatsRequest(dbName, tableName, colNames)) + .getTableStats(); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public Map> getPartitionColumnStatistics( String dbName, String tableName, List partNames, List colNames) throws NoSuchObjectException, MetaException, TException { - return client.get_partitions_statistics_req( - new PartitionsStatsRequest(dbName, tableName, colNames, partNames)).getPartStats(); + return client + .get_partitions_statistics_req( + new PartitionsStatsRequest(dbName, tableName, colNames, partNames)) + .getPartStats(); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public boolean deletePartitionColumnStatistics( - String dbName, String tableName, String partName, - String colName) throws NoSuchObjectException, InvalidObjectException, MetaException, - TException, InvalidInputException { + String dbName, String tableName, String partName, String colName) + throws NoSuchObjectException, InvalidObjectException, MetaException, TException, + InvalidInputException { return client.delete_partition_column_statistics(dbName, tableName, partName, colName); } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ @Override public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, - InvalidInputException { + InvalidInputException { return client.delete_table_column_statistics(dbName, tableName, colName); } @@ -1765,13 +1785,13 @@ public boolean deleteTableColumnStatistics(String dbName, String tableName, Stri * @throws UnknownDBException * @throws MetaException * @throws TException - * @see org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_schema(java.lang.String, - * java.lang.String) + * @see + * org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore.Iface#get_schema(java.lang.String, + * java.lang.String) */ @Override public List getSchema(String db, String tableName) - throws MetaException, TException, UnknownTableException, - UnknownDBException { + throws MetaException, TException, UnknownTableException, UnknownDBException { EnvironmentContext envCxt = null; String addedJars = conf.getVar(ConfVars.HIVEADDEDJARS); if (org.apache.commons.lang.StringUtils.isNotBlank(addedJars)) { @@ -1796,26 +1816,29 @@ public Partition appendPartitionByName(String dbName, String tableName, String p } public Partition appendPartitionByName( - String dbName, String tableName, String partName, - EnvironmentContext envContext) throws InvalidObjectException, AlreadyExistsException, - MetaException, TException { - Partition p = client.append_partition_by_name_with_environment_context(dbName, tableName, - partName, envContext); + String dbName, String tableName, String partName, EnvironmentContext envContext) + throws InvalidObjectException, AlreadyExistsException, MetaException, TException { + Partition p = + client.append_partition_by_name_with_environment_context( + dbName, tableName, partName, envContext); return fastpath ? p : deepCopy(p); } public boolean dropPartitionByName( - String dbName, String tableName, String partName, - boolean deleteData) throws NoSuchObjectException, MetaException, TException { + String dbName, String tableName, String partName, boolean deleteData) + throws NoSuchObjectException, MetaException, TException { return dropPartitionByName(dbName, tableName, partName, deleteData, null); } public boolean dropPartitionByName( - String dbName, String tableName, String partName, - boolean deleteData, EnvironmentContext envContext) throws NoSuchObjectException, - MetaException, TException { - return client.drop_partition_by_name_with_environment_context(dbName, tableName, partName, - deleteData, envContext); + String dbName, + String tableName, + String partName, + boolean deleteData, + EnvironmentContext envContext) + throws NoSuchObjectException, MetaException, TException { + return client.drop_partition_by_name_with_environment_context( + dbName, tableName, partName, deleteData, envContext); } private HiveMetaHook getHook(Table tbl) throws MetaException { @@ -1907,8 +1930,7 @@ private List deepCopyPartitions(List partitions) { return deepCopyPartitions(partitions, null); } - private List deepCopyPartitions( - Collection src, List dest) { + private List deepCopyPartitions(Collection src, List dest) { if (src == null) { return dest; } @@ -1944,18 +1966,20 @@ protected List deepCopyFieldSchemas(List schemas) { } @Override - public boolean dropIndex( - String dbName, String tblName, String name, - boolean deleteData) throws NoSuchObjectException, MetaException, - TException { + public boolean dropIndex(String dbName, String tblName, String name, boolean deleteData) + throws NoSuchObjectException, MetaException, TException { return client.drop_index_by_name(dbName, tblName, name, deleteData); } @Override public boolean grant_role( - String roleName, String userName, - PrincipalType principalType, String grantor, PrincipalType grantorType, - boolean grantOption) throws MetaException, TException { + String roleName, + String userName, + PrincipalType principalType, + String grantor, + PrincipalType grantorType, + boolean grantOption) + throws MetaException, TException { GrantRevokeRoleRequest req = new GrantRevokeRoleRequest(); req.setRequestType(GrantRevokeType.GRANT); req.setRoleName(roleName); @@ -1972,8 +1996,7 @@ public boolean grant_role( } @Override - public boolean create_role(Role role) - throws MetaException, TException { + public boolean create_role(Role role) throws MetaException, TException { return client.create_role(role); } @@ -1983,9 +2006,8 @@ public boolean drop_role(String roleName) throws MetaException, TException { } @Override - public List list_roles( - String principalName, - PrincipalType principalType) throws MetaException, TException { + public List list_roles(String principalName, PrincipalType principalType) + throws MetaException, TException { return client.list_roles(principalName, principalType); } @@ -2007,8 +2029,7 @@ public GetRoleGrantsForPrincipalResponse get_role_grants_for_principal( } @Override - public boolean grant_privileges(PrivilegeBag privileges) - throws MetaException, TException { + public boolean grant_privileges(PrivilegeBag privileges) throws MetaException, TException { GrantRevokePrivilegeRequest req = new GrantRevokePrivilegeRequest(); req.setRequestType(GrantRevokeType.GRANT); req.setPrivileges(privileges); @@ -2021,8 +2042,8 @@ public boolean grant_privileges(PrivilegeBag privileges) @Override public boolean revoke_role( - String roleName, String userName, - PrincipalType principalType, boolean grantOption) throws MetaException, TException { + String roleName, String userName, PrincipalType principalType, boolean grantOption) + throws MetaException, TException { GrantRevokeRoleRequest req = new GrantRevokeRoleRequest(); req.setRequestType(GrantRevokeType.REVOKE); req.setRoleName(roleName); @@ -2037,8 +2058,8 @@ public boolean revoke_role( } @Override - public boolean revoke_privileges(PrivilegeBag privileges, boolean grantOption) throws MetaException, - TException { + public boolean revoke_privileges(PrivilegeBag privileges, boolean grantOption) + throws MetaException, TException { GrantRevokePrivilegeRequest req = new GrantRevokePrivilegeRequest(); req.setRequestType(GrantRevokeType.REVOKE); req.setPrivileges(privileges); @@ -2052,31 +2073,29 @@ public boolean revoke_privileges(PrivilegeBag privileges, boolean grantOption) t @Override public PrincipalPrivilegeSet get_privilege_set( - HiveObjectRef hiveObject, - String userName, List groupNames) throws MetaException, - TException { + HiveObjectRef hiveObject, String userName, List groupNames) + throws MetaException, TException { return client.get_privilege_set(hiveObject, userName, groupNames); } @Override public List list_privileges( - String principalName, - PrincipalType principalType, HiveObjectRef hiveObject) + String principalName, PrincipalType principalType, HiveObjectRef hiveObject) throws MetaException, TException { return client.list_privileges(principalName, principalType, hiveObject); } - public String getDelegationToken(String renewerKerberosPrincipalName) throws - MetaException, TException, IOException { - //a convenience method that makes the intended owner for the delegation - //token request the current user + public String getDelegationToken(String renewerKerberosPrincipalName) + throws MetaException, TException, IOException { + // a convenience method that makes the intended owner for the delegation + // token request the current user String owner = conf.getUser(); return getDelegationToken(owner, renewerKerberosPrincipalName); } @Override - public String getDelegationToken(String owner, String renewerKerberosPrincipalName) throws - MetaException, TException { + public String getDelegationToken(String owner, String renewerKerberosPrincipalName) + throws MetaException, TException { // This is expected to be a no-op, so we will return null when we use local metastore. if (localMetaStore) { return null; @@ -2176,8 +2195,7 @@ public void rollbackTxn(long txnid) throws NoSuchTxnException, TException { } @Override - public void commitTxn(long txnid) - throws NoSuchTxnException, TxnAbortedException, TException { + public void commitTxn(long txnid) throws NoSuchTxnException, TxnAbortedException, TException { client.commit_txn(new CommitTxnRequest(txnid)); } @@ -2199,14 +2217,12 @@ public LockResponse lock(LockRequest request) @Override public LockResponse checkLock(long lockid) - throws NoSuchTxnException, TxnAbortedException, NoSuchLockException, - TException { + throws NoSuchTxnException, TxnAbortedException, NoSuchLockException, TException { return client.check_lock(new CheckLockRequest(lockid)); } @Override - public void unlock(long lockid) - throws NoSuchLockException, TxnOpenException, TException { + public void unlock(long lockid) throws NoSuchLockException, TxnOpenException, TException { client.unlock(new UnlockRequest(lockid)); } @@ -2223,8 +2239,7 @@ public ShowLocksResponse showLocks(ShowLocksRequest request) throws TException { @Override public void heartbeat(long txnid, long lockid) - throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, - TException { + throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, TException { HeartbeatRequest hb = new HeartbeatRequest(); hb.setLockid(lockid); hb.setTxnid(txnid); @@ -2258,8 +2273,12 @@ public void compact(String dbname, String tableName, String partitionName, Compa @Override public void compact( - String dbname, String tableName, String partitionName, CompactionType type, - Map tblproperties) throws TException { + String dbname, + String tableName, + String partitionName, + CompactionType type, + Map tblproperties) + throws TException { CompactionRequest cr = new CompactionRequest(); if (dbname == null) { cr.setDbname(DEFAULT_DATABASE_NAME); @@ -2283,15 +2302,18 @@ public ShowCompactResponse showCompactions() throws TException { @Deprecated @Override public void addDynamicPartitions( - long txnId, String dbName, String tableName, - List partNames) throws TException { + long txnId, String dbName, String tableName, List partNames) throws TException { client.add_dynamic_partitions(new AddDynamicPartitions(txnId, dbName, tableName, partNames)); } @Override public void addDynamicPartitions( - long txnId, String dbName, String tableName, - List partNames, DataOperationType operationType) throws TException { + long txnId, + String dbName, + String tableName, + List partNames, + DataOperationType operationType) + throws TException { AddDynamicPartitions adp = new AddDynamicPartitions(txnId, dbName, tableName, partNames); adp.setOperationType(operationType); client.add_dynamic_partitions(adp); @@ -2300,8 +2322,7 @@ public void addDynamicPartitions( @InterfaceAudience.LimitedPrivate({"HCatalog"}) @Override public NotificationEventResponse getNextNotification( - long lastEventId, int maxEvents, - NotificationFilter filter) throws TException { + long lastEventId, int maxEvents, NotificationFilter filter) throws TException { NotificationEventRequest rqst = new NotificationEventRequest(lastEventId); rqst.setMaxEvents(maxEvents); NotificationEventResponse rsp = client.get_next_notification(rqst); @@ -2334,19 +2355,18 @@ public FireEventResponse fireListenerEvent(FireEventRequest rqst) throws TExcept } /** - * Creates a synchronized wrapper for any {@link IMetaStoreClient}. - * This may be used by multi-threaded applications until we have - * fixed all reentrancy bugs. + * Creates a synchronized wrapper for any {@link IMetaStoreClient}. This may be used by + * multi-threaded applications until we have fixed all reentrancy bugs. * * @param client unsynchronized client * @return synchronized client */ - public static IMetaStoreClient newSynchronizedClient( - IMetaStoreClient client) { - return (IMetaStoreClient) Proxy.newProxyInstance( - HiveMetaStoreClient.class.getClassLoader(), - new Class[] {IMetaStoreClient.class}, - new SynchronizedHandler(client)); + public static IMetaStoreClient newSynchronizedClient(IMetaStoreClient client) { + return (IMetaStoreClient) + Proxy.newProxyInstance( + HiveMetaStoreClient.class.getClassLoader(), + new Class[] {IMetaStoreClient.class}, + new SynchronizedHandler(client)); } private static class SynchronizedHandler implements InvocationHandler { @@ -2357,8 +2377,7 @@ private static class SynchronizedHandler implements InvocationHandler { } @Override - public synchronized Object invoke(Object proxy, Method method, Object[] args) - throws Throwable { + public synchronized Object invoke(Object proxy, Method method, Object[] args) throws Throwable { try { return method.invoke(client, args); } catch (InvocationTargetException e) { @@ -2369,13 +2388,9 @@ public synchronized Object invoke(Object proxy, Method method, Object[] args) @Override public void markPartitionForEvent( - String dbName, - String tblName, - Map partKVs, - PartitionEventType eventType) + String dbName, String tblName, Map partKVs, PartitionEventType eventType) throws MetaException, TException, NoSuchObjectException, UnknownDBException, - UnknownTableException, - InvalidPartitionException, UnknownPartitionException { + UnknownTableException, InvalidPartitionException, UnknownPartitionException { assert dbName != null; assert tblName != null; assert partKVs != null; @@ -2384,12 +2399,9 @@ public void markPartitionForEvent( @Override public boolean isPartitionMarkedForEvent( - String dbName, - String tblName, - Map partKVs, - PartitionEventType eventType) - throws MetaException, NoSuchObjectException, UnknownTableException, UnknownDBException, TException, - InvalidPartitionException, UnknownPartitionException { + String dbName, String tblName, Map partKVs, PartitionEventType eventType) + throws MetaException, NoSuchObjectException, UnknownTableException, UnknownDBException, + TException, InvalidPartitionException, UnknownPartitionException { assert dbName != null; assert tblName != null; assert partKVs != null; @@ -2397,8 +2409,8 @@ public boolean isPartitionMarkedForEvent( } @Override - public void createFunction(Function func) throws InvalidObjectException, - MetaException, TException { + public void createFunction(Function func) + throws InvalidObjectException, MetaException, TException { client.create_function(func); } @@ -2410,49 +2422,46 @@ public void alterFunction(String dbName, String funcName, Function newFunction) @Override public void dropFunction(String dbName, String funcName) - throws MetaException, NoSuchObjectException, InvalidObjectException, - InvalidInputException, TException { + throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException, + TException { client.drop_function(dbName, funcName); } @Override - public Function getFunction(String dbName, String funcName) - throws MetaException, TException { + public Function getFunction(String dbName, String funcName) throws MetaException, TException { Function f = client.get_function(dbName, funcName); return fastpath ? f : deepCopy(f); } @Override - public List getFunctions(String dbName, String pattern) - throws MetaException, TException { + public List getFunctions(String dbName, String pattern) throws MetaException, TException { return client.get_functions(dbName, pattern); } @Override - public GetAllFunctionsResponse getAllFunctions() - throws MetaException, TException { + public GetAllFunctionsResponse getAllFunctions() throws MetaException, TException { return client.get_all_functions(); } protected void create_table_with_environment_context(Table tbl, EnvironmentContext envContext) - throws AlreadyExistsException, InvalidObjectException, - MetaException, NoSuchObjectException, TException { + throws AlreadyExistsException, InvalidObjectException, MetaException, NoSuchObjectException, + TException { client.create_table_with_environment_context(tbl, envContext); } protected void drop_table_with_environment_context( - String dbname, String name, - boolean deleteData, EnvironmentContext envContext) throws MetaException, TException, - NoSuchObjectException, UnsupportedOperationException { + String dbname, String name, boolean deleteData, EnvironmentContext envContext) + throws MetaException, TException, NoSuchObjectException, UnsupportedOperationException { client.drop_table_with_environment_context(dbname, name, deleteData, envContext); } @Override public AggrStats getAggrColStatsFor( - String dbName, String tblName, - List colNames, List partNames) throws NoSuchObjectException, MetaException, TException { + String dbName, String tblName, List colNames, List partNames) + throws NoSuchObjectException, MetaException, TException { if (colNames.isEmpty() || partNames.isEmpty()) { - LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval on client side."); + LOG.debug( + "Columns is empty or partNames is empty : Short-circuiting stats eval on client side."); return new AggrStats(new ArrayList(), 0); // Nothing to aggregate } PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames); @@ -2460,8 +2469,8 @@ public AggrStats getAggrColStatsFor( } @Override - public Iterable> getFileMetadata( - final List fileIds) throws TException { + public Iterable> getFileMetadata(final List fileIds) + throws TException { return new MetastoreMapIterable() { private int listIndex = 0; @@ -2503,8 +2512,8 @@ protected Map fetchNextBatch() throws TException { } int endIndex = Math.min(listIndex + fileMetadataBatchSize, fileIds.size()); List subList = fileIds.subList(listIndex, endIndex); - GetFileMetadataByExprResult resp = sendGetFileMetadataBySargReq( - sarg, subList, doGetFooters); + GetFileMetadataByExprResult resp = + sendGetFileMetadataBySargReq(sarg, subList, doGetFooters); if (!resp.isIsSupported()) { return null; } diff --git a/trino/src/main/java/org/apache/hadoop/util/VersionInfo.java b/trino/src/main/java/org/apache/hadoop/util/VersionInfo.java index a613e802ce..0ecc125611 100644 --- a/trino/src/main/java/org/apache/hadoop/util/VersionInfo.java +++ b/trino/src/main/java/org/apache/hadoop/util/VersionInfo.java @@ -28,9 +28,7 @@ import java.io.InputStream; import java.util.Properties; -/** - * Copy from hadoop-common to avoid testing hive metastore checking version - */ +/** Copy from hadoop-common to avoid testing hive metastore checking version */ @InterfaceAudience.Public @InterfaceStability.Stable public class VersionInfo { @@ -43,15 +41,14 @@ protected VersionInfo(String component) { String versionInfoFile = component + "-version-info.properties"; InputStream is = null; try { - is = Thread.currentThread().getContextClassLoader() - .getResourceAsStream(versionInfoFile); + is = Thread.currentThread().getContextClassLoader().getResourceAsStream(versionInfoFile); if (is == null) { throw new IOException("Resource not found"); } info.load(is); } catch (IOException ex) { - LoggerFactory.getLogger(getClass()).warn("Could not read '" + - versionInfoFile + "', " + ex.toString(), ex); + LoggerFactory.getLogger(getClass()) + .warn("Could not read '" + versionInfoFile + "', " + ex.toString(), ex); } finally { IOUtils.closeStream(is); } @@ -86,10 +83,13 @@ protected String _getSrcChecksum() { } protected String _getBuildVersion() { - return _getVersion() + - " from " + _getRevision() + - " by " + _getUser() + - " source checksum " + _getSrcChecksum(); + return _getVersion() + + " from " + + _getRevision() + + " by " + + _getUser() + + " source checksum " + + _getSrcChecksum(); } protected String _getProtocVersion() { @@ -100,6 +100,7 @@ protected String _getProtocVersion() { /** * Get the Hadoop version. + * * @return the Hadoop version string, eg. "0.6.3-dev" */ public static String getVersion() { @@ -108,6 +109,7 @@ public static String getVersion() { /** * Get the Git commit hash of the repository when compiled. + * * @return the commit hash, eg. "18f64065d5db6208daf50b02c1b5ed4ee3ce547a" */ public static String getRevision() { @@ -116,6 +118,7 @@ public static String getRevision() { /** * Get the branch on which this originated. + * * @return The branch name, e.g. "trunk" or "branches/branch-0.20" */ public static String getBranch() { @@ -124,6 +127,7 @@ public static String getBranch() { /** * The date that Hadoop was compiled. + * * @return the compilation date in unix date format */ public static String getDate() { @@ -132,6 +136,7 @@ public static String getDate() { /** * The user that compiled Hadoop. + * * @return the username of the user */ public static String getUser() { @@ -140,6 +145,7 @@ public static String getUser() { /** * Get the URL for the Hadoop repository. + * * @return the URL of the Hadoop repository */ public static String getUrl() { @@ -148,6 +154,7 @@ public static String getUrl() { /** * Get the checksum of the source files from which Hadoop was built. + * * @return the checksum of the source files */ public static String getSrcChecksum() { @@ -155,8 +162,8 @@ public static String getSrcChecksum() { } /** - * Returns the buildVersion which includes version, - * revision, user and date. + * Returns the buildVersion which includes version, revision, user and date. + * * @return the buildVersion */ public static String getBuildVersion() { @@ -165,6 +172,7 @@ public static String getBuildVersion() { /** * Returns the protoc version used for the build. + * * @return the protoc version */ public static String getProtocVersion() { @@ -178,7 +186,7 @@ public static void main(String[] args) { System.out.println("Compiled by " + getUser() + " on " + getDate()); System.out.println("Compiled with protoc " + getProtocVersion()); System.out.println("From source with checksum " + getSrcChecksum()); - System.out.println("This command was run using " + - ClassUtil.findContainingJar(VersionInfo.class)); + System.out.println( + "This command was run using " + ClassUtil.findContainingJar(VersionInfo.class)); } } diff --git a/trino/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverterUtil.java b/trino/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverterUtil.java index d171b4926b..d474949c28 100644 --- a/trino/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverterUtil.java +++ b/trino/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverterUtil.java @@ -5,13 +5,9 @@ import org.apache.parquet.format.SchemaElement; import org.apache.parquet.schema.LogicalTypeAnnotation; -/** - * Copy from hive-apache package, because include hive-apache will cause class conflict - */ +/** Copy from hive-apache package, because include hive-apache will cause class conflict */ public final class ParquetMetadataConverterUtil { - private ParquetMetadataConverterUtil() { - - } + private ParquetMetadataConverterUtil() {} public static LogicalTypeAnnotation getLogicalTypeAnnotation( ParquetMetadataConverter parquetMetadataConverter, @@ -21,8 +17,7 @@ public static LogicalTypeAnnotation getLogicalTypeAnnotation( } public static LogicalTypeAnnotation getLogicalTypeAnnotation( - ParquetMetadataConverter parquetMetadataConverter, - LogicalType logicalType) { + ParquetMetadataConverter parquetMetadataConverter, LogicalType logicalType) { return parquetMetadataConverter.getLogicalTypeAnnotation(logicalType); } diff --git a/trino/src/main/java/org/apache/parquet/io/ColumnIOUtil.java b/trino/src/main/java/org/apache/parquet/io/ColumnIOUtil.java index 4606c4c717..eb962602c3 100644 --- a/trino/src/main/java/org/apache/parquet/io/ColumnIOUtil.java +++ b/trino/src/main/java/org/apache/parquet/io/ColumnIOUtil.java @@ -14,13 +14,9 @@ package org.apache.parquet.io; -/** - * Copy from hive-apache package, because include hive-apache will cause class conflict - */ +/** Copy from hive-apache package, because include hive-apache will cause class conflict */ public final class ColumnIOUtil { - private ColumnIOUtil() { - - } + private ColumnIOUtil() {} public static int columnDefinitionLevel(ColumnIO column) { return column.getDefinitionLevel(); diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/ArcticQueryRunner.java b/trino/src/test/java/com/netease/arctic/trino/arctic/ArcticQueryRunner.java index 24dbefa2cd..48ab4a966e 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/ArcticQueryRunner.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/ArcticQueryRunner.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.arctic; +import static io.airlift.testing.Closeables.closeAllSuppress; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableMap; import com.netease.arctic.trino.ArcticPlugin; import io.airlift.log.Logger; @@ -29,32 +33,23 @@ import java.util.Map; import java.util.Optional; -import static io.airlift.testing.Closeables.closeAllSuppress; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.util.Objects.requireNonNull; - public final class ArcticQueryRunner { private static final Logger log = Logger.get(ArcticQueryRunner.class); public static final String ARCTIC_CATALOG = "arctic"; - private ArcticQueryRunner() { - } + private ArcticQueryRunner() {} public static Builder builder() { return new Builder(); } - public static class Builder - extends DistributedQueryRunner.Builder { + public static class Builder extends DistributedQueryRunner.Builder { private Optional metastoreDirectory = Optional.empty(); private ImmutableMap.Builder icebergProperties = ImmutableMap.builder(); protected Builder() { - super(testSessionBuilder() - .setCatalog(ARCTIC_CATALOG) - .setSchema("tpch") - .build()); + super(testSessionBuilder().setCatalog(ARCTIC_CATALOG).setSchema("tpch").build()); } public Builder setMetastoreDirectory(File metastoreDirectory) { @@ -63,8 +58,9 @@ public Builder setMetastoreDirectory(File metastoreDirectory) { } public Builder setIcebergProperties(Map icebergProperties) { - this.icebergProperties = ImmutableMap.builder() - .putAll(requireNonNull(icebergProperties, "icebergProperties is null")); + this.icebergProperties = + ImmutableMap.builder() + .putAll(requireNonNull(icebergProperties, "icebergProperties is null")); return self(); } @@ -74,15 +70,15 @@ public Builder addIcebergProperty(String key, String value) { } @Override - public DistributedQueryRunner build() - throws Exception { + public DistributedQueryRunner build() throws Exception { DistributedQueryRunner queryRunner = super.build(); try { queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); queryRunner.installPlugin(new ArcticPlugin()); - Map icebergProperties = new HashMap<>(this.icebergProperties.buildOrThrow()); + Map icebergProperties = + new HashMap<>(this.icebergProperties.buildOrThrow()); queryRunner.createCatalog(ARCTIC_CATALOG, "arctic", icebergProperties); return queryRunner; } catch (Exception e) { @@ -92,12 +88,12 @@ public DistributedQueryRunner build() } } - public static void main(String[] args) - throws Exception { + public static void main(String[] args) throws Exception { DistributedQueryRunner queryRunner = null; - queryRunner = ArcticQueryRunner.builder() - .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) - .build(); + queryRunner = + ArcticQueryRunner.builder() + .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) + .build(); Thread.sleep(10); Logger log = Logger.get(ArcticQueryRunner.class); log.info("======== SERVER STARTED ========"); diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/HiveTestRecords.java b/trino/src/test/java/com/netease/arctic/trino/arctic/HiveTestRecords.java index fd11a74125..12b7bcdfd9 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/HiveTestRecords.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/HiveTestRecords.java @@ -18,17 +18,6 @@ package com.netease.arctic.trino.arctic; -import org.apache.iceberg.data.GenericRecord; -import org.apache.iceberg.data.Record; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; - -import java.math.BigDecimal; -import java.time.LocalDateTime; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.util.List; - import static com.netease.arctic.trino.arctic.TestHiveTableBaseForTrino.COLUMN_NAME_ARRAY; import static com.netease.arctic.trino.arctic.TestHiveTableBaseForTrino.COLUMN_NAME_D; import static com.netease.arctic.trino.arctic.TestHiveTableBaseForTrino.COLUMN_NAME_ID; @@ -42,6 +31,17 @@ import static com.netease.arctic.trino.arctic.TestHiveTableBaseForTrino.HIVE_TABLE_SCHEMA; import static com.netease.arctic.trino.arctic.TestHiveTableBaseForTrino.STRUCT_SUB_SCHEMA; +import org.apache.iceberg.data.GenericRecord; +import org.apache.iceberg.data.Record; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; + +import java.math.BigDecimal; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.List; + public class HiveTestRecords { public static List baseRecords() { @@ -50,50 +50,48 @@ public static List baseRecords() { ImmutableList.Builder builder = ImmutableList.builder(); { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 3 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 3, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 3, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("102") - ).put( - COLUMN_NAME_NAME, "jake" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 3) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 3, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 3, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("102")) + .put(COLUMN_NAME_NAME, "jake") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 4 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 4, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 4, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("103") - ).put( - COLUMN_NAME_NAME, "sam" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 4) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 4, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 4, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("103")) + .put(COLUMN_NAME_NAME, "sam") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } @@ -106,50 +104,48 @@ public static List hiveRecords() { ImmutableList.Builder builder = ImmutableList.builder(); { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 1 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("100") - ).put( - COLUMN_NAME_NAME, "john" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 1) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("100")) + .put(COLUMN_NAME_NAME, "john") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 2 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 2, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 2, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("101") - ).put( - COLUMN_NAME_NAME, "lily" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 2) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 2, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 2, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("101")) + .put(COLUMN_NAME_NAME, "lily") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } @@ -162,50 +158,48 @@ public static List changeInsertRecords() { ImmutableList.Builder builder = ImmutableList.builder(); { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 5 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("104") - ).put( - COLUMN_NAME_NAME, "mary" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 5) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("104")) + .put(COLUMN_NAME_NAME, "mary") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 6 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("105") - ).put( - COLUMN_NAME_NAME, "mack" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 6) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("105")) + .put(COLUMN_NAME_NAME, "mack") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } @@ -218,74 +212,71 @@ public static List changeDeleteRecords() { ImmutableList.Builder builder = ImmutableList.builder(); { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 5 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("104") - ).put( - COLUMN_NAME_NAME, "mary" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 5) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("104")) + .put(COLUMN_NAME_NAME, "mary") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 1 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("100") - ).put( - COLUMN_NAME_NAME, "john" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 1) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 1, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 1, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("100")) + .put(COLUMN_NAME_NAME, "john") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } { - ImmutableMap columns = ImmutableMap.builder().put( - COLUMN_NAME_ID, 3 - ).put( - COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 3, 12, 0, 0) - ).put( - COLUMN_NAME_OP_TIME_WITH_ZONE, OffsetDateTime.of( - LocalDateTime.of(2022, 1, 3, 12, 0, 0), ZoneOffset.UTC) - ) - .put( - COLUMN_NAME_D, new BigDecimal("102") - ).put( - COLUMN_NAME_NAME, "jake" - ).put( - COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value") - ).put( - COLUMN_NAME_ARRAY, ImmutableList.of("array_element") - ).put( - COLUMN_NAME_STRUCT, structRecord.copy(COLUMN_NAME_STRUCT_SUB1, "struct_sub1", COLUMN_NAME_STRUCT_SUB2, - "struct_sub2") - ).build(); + ImmutableMap columns = + ImmutableMap.builder() + .put(COLUMN_NAME_ID, 3) + .put(COLUMN_NAME_OP_TIME, LocalDateTime.of(2022, 1, 3, 12, 0, 0)) + .put( + COLUMN_NAME_OP_TIME_WITH_ZONE, + OffsetDateTime.of(LocalDateTime.of(2022, 1, 3, 12, 0, 0), ZoneOffset.UTC)) + .put(COLUMN_NAME_D, new BigDecimal("102")) + .put(COLUMN_NAME_NAME, "jake") + .put(COLUMN_NAME_MAP, ImmutableMap.of("map_key", "map_value")) + .put(COLUMN_NAME_ARRAY, ImmutableList.of("array_element")) + .put( + COLUMN_NAME_STRUCT, + structRecord.copy( + COLUMN_NAME_STRUCT_SUB1, + "struct_sub1", + COLUMN_NAME_STRUCT_SUB2, + "struct_sub2")) + .build(); builder.add(record.copy(columns)); } diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseForTrino.java b/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseForTrino.java index b70811709f..9a71a44a5a 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseForTrino.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseForTrino.java @@ -18,6 +18,9 @@ package com.netease.arctic.trino.arctic; +import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; +import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_DB_NAME; + import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.netease.arctic.ams.api.CatalogMeta; @@ -63,9 +66,6 @@ import java.util.List; import java.util.Set; -import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; -import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_DB_NAME; - public abstract class TableTestBaseForTrino extends AbstractTestQueryFramework { protected static TemporaryFolder tmp = new TemporaryFolder(); @@ -78,38 +78,39 @@ public abstract class TableTestBaseForTrino extends AbstractTestQueryFramework { TableIdentifier.of(TEST_CATALOG_NAME, TEST_DB_NAME, "test_table"); protected static final TableIdentifier PK_TABLE_ID = TableIdentifier.of(TEST_CATALOG_NAME, TEST_DB_NAME, "test_pk_table"); - protected static final Schema TABLE_SCHEMA = new Schema( - Types.NestedField.required(1, "id", Types.IntegerType.get()), - Types.NestedField.required(2, "name$name", Types.StringType.get()), - Types.NestedField.required(3, "op_time", Types.TimestampType.withoutZone()) - ); + protected static final Schema TABLE_SCHEMA = + new Schema( + Types.NestedField.required(1, "id", Types.IntegerType.get()), + Types.NestedField.required(2, "name$name", Types.StringType.get()), + Types.NestedField.required(3, "op_time", Types.TimestampType.withoutZone())); protected static final Record RECORD = GenericRecord.create(TABLE_SCHEMA); - protected static final Schema POS_DELETE_SCHEMA = new Schema( - MetadataColumns.DELETE_FILE_PATH, - MetadataColumns.DELETE_FILE_POS - ); - protected static final PartitionSpec SPEC = PartitionSpec.builderFor(TABLE_SCHEMA) - .day("op_time").build(); - protected static final PrimaryKeySpec PRIMARY_KEY_SPEC = PrimaryKeySpec.builderFor(TABLE_SCHEMA) - .addColumn("id").build(); - protected static final DataFile FILE_A = DataFiles.builder(SPEC) - .withPath("/path/to/data-a.parquet") - .withFileSizeInBytes(0) - .withPartitionPath("op_time_day=2022-01-01") // easy way to set partition data for now - .withRecordCount(2) // needs at least one record or else metrics will filter it out - .build(); - protected static final DataFile FILE_B = DataFiles.builder(SPEC) - .withPath("/path/to/data-b.parquet") - .withFileSizeInBytes(0) - .withPartitionPath("op_time_day=2022-01-02") // easy way to set partition data for now - .withRecordCount(2) // needs at least one record or else metrics will filter it out - .build(); - protected static final DataFile FILE_C = DataFiles.builder(SPEC) - .withPath("/path/to/data-b.parquet") - .withFileSizeInBytes(0) - .withPartitionPath("op_time_day=2022-01-03") // easy way to set partition data for now - .withRecordCount(2) // needs at least one record or else metrics will filter it out - .build(); + protected static final Schema POS_DELETE_SCHEMA = + new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS); + protected static final PartitionSpec SPEC = + PartitionSpec.builderFor(TABLE_SCHEMA).day("op_time").build(); + protected static final PrimaryKeySpec PRIMARY_KEY_SPEC = + PrimaryKeySpec.builderFor(TABLE_SCHEMA).addColumn("id").build(); + protected static final DataFile FILE_A = + DataFiles.builder(SPEC) + .withPath("/path/to/data-a.parquet") + .withFileSizeInBytes(0) + .withPartitionPath("op_time_day=2022-01-01") // easy way to set partition data for now + .withRecordCount(2) // needs at least one record or else metrics will filter it out + .build(); + protected static final DataFile FILE_B = + DataFiles.builder(SPEC) + .withPath("/path/to/data-b.parquet") + .withFileSizeInBytes(0) + .withPartitionPath("op_time_day=2022-01-02") // easy way to set partition data for now + .withRecordCount(2) // needs at least one record or else metrics will filter it out + .build(); + protected static final DataFile FILE_C = + DataFiles.builder(SPEC) + .withPath("/path/to/data-b.parquet") + .withFileSizeInBytes(0) + .withPartitionPath("op_time_day=2022-01-03") // easy way to set partition data for now + .withRecordCount(2) // needs at least one record or else metrics will filter it out + .build(); protected ArcticCatalog testCatalog; protected UnkeyedTable testTable; @@ -119,18 +120,22 @@ protected void setupTables() throws Exception { testCatalog = CatalogLoader.load(AMS.getUrl(CatalogTestHelper.TEST_CATALOG_NAME)); File tableDir = tmp.newFolder(); - testTable = testCatalog - .newTableBuilder(TABLE_ID, TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, tableDir.getPath() + "/table") - .withPartitionSpec(SPEC) - .create().asUnkeyedTable(); + testTable = + testCatalog + .newTableBuilder(TABLE_ID, TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, tableDir.getPath() + "/table") + .withPartitionSpec(SPEC) + .create() + .asUnkeyedTable(); - testKeyedTable = testCatalog - .newTableBuilder(PK_TABLE_ID, TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, tableDir.getPath() + "/pk_table") - .withPartitionSpec(SPEC) - .withPrimaryKeySpec(PRIMARY_KEY_SPEC) - .create().asKeyedTable(); + testKeyedTable = + testCatalog + .newTableBuilder(PK_TABLE_ID, TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, tableDir.getPath() + "/pk_table") + .withPartitionSpec(SPEC) + .withPrimaryKeySpec(PRIMARY_KEY_SPEC) + .create() + .asKeyedTable(); this.before(); } @@ -163,15 +168,16 @@ protected void clearTable() { protected List writeBase(TableIdentifier identifier, List records) { KeyedTable table = testCatalog.loadTable(identifier).asKeyedTable(); long txId = table.beginTransaction(""); - try (GenericBaseTaskWriter writer = GenericTaskWriters.builderFor(table) - .withTransactionId(txId).buildBaseWriter()) { - records.forEach(d -> { - try { - writer.write(d); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + try (GenericBaseTaskWriter writer = + GenericTaskWriters.builderFor(table).withTransactionId(txId).buildBaseWriter()) { + records.forEach( + d -> { + try { + writer.write(d); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); WriteResult result = writer.complete(); AppendFiles appendFiles = table.baseTable().newAppend(); Arrays.stream(result.dataFiles()).forEach(appendFiles::appendFile); @@ -182,18 +188,19 @@ protected List writeBase(TableIdentifier identifier, List reco } } - protected List writeChange(TableIdentifier identifier, ChangeAction action, List records) { + protected List writeChange( + TableIdentifier identifier, ChangeAction action, List records) { KeyedTable table = testCatalog.loadTable(identifier).asKeyedTable(); - try (GenericChangeTaskWriter writer = GenericTaskWriters.builderFor(table) - .withChangeAction(action) - .buildChangeWriter()) { - records.forEach(d -> { - try { - writer.write(d); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + try (GenericChangeTaskWriter writer = + GenericTaskWriters.builderFor(table).withChangeAction(action).buildChangeWriter()) { + records.forEach( + d -> { + try { + writer.write(d); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); WriteResult result = writer.complete(); AppendFiles appendFiles = table.changeTable().newAppend(); @@ -206,26 +213,31 @@ protected List writeChange(TableIdentifier identifier, ChangeAction ac } protected static List readKeyedTable(KeyedTable keyedTable) { - GenericKeyedDataReader reader = new GenericKeyedDataReader( - keyedTable.io(), - keyedTable.schema(), - keyedTable.schema(), - keyedTable.primaryKeySpec(), - null, - true, - IdentityPartitionConverters::convertConstant - ); + GenericKeyedDataReader reader = + new GenericKeyedDataReader( + keyedTable.io(), + keyedTable.schema(), + keyedTable.schema(), + keyedTable.primaryKeySpec(), + null, + true, + IdentityPartitionConverters::convertConstant); List result = Lists.newArrayList(); try (CloseableIterable combinedScanTasks = keyedTable.newScan().planTasks()) { - combinedScanTasks.forEach(combinedTask -> combinedTask.tasks().forEach(scTask -> { - try (CloseableIterator records = reader.readData(scTask)) { - while (records.hasNext()) { - result.add(records.next()); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - })); + combinedScanTasks.forEach( + combinedTask -> + combinedTask + .tasks() + .forEach( + scTask -> { + try (CloseableIterator records = reader.readData(scTask)) { + while (records.hasNext()) { + result.add(records.next()); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + })); } catch (IOException e) { throw new RuntimeException(e); } @@ -252,7 +264,8 @@ public static LocalDateTime quickDate(int day) { return LocalDateTime.of(2020, 1, day, 0, 0); } - protected StructLike partitionData(Schema tableSchema, PartitionSpec spec, Object... partitionValues) { + protected StructLike partitionData( + Schema tableSchema, PartitionSpec spec, Object... partitionValues) { GenericRecord record = GenericRecord.create(tableSchema); int index = 0; Set partitionField = Sets.newHashSet(); @@ -277,17 +290,18 @@ protected StructLike partitionData(Schema tableSchema, PartitionSpec spec, Objec return pd; } - - protected static List writeBaseNoCommit(KeyedTable table, long txId, List records) { - try (GenericBaseTaskWriter writer = GenericTaskWriters.builderFor(table) - .withTransactionId(txId).buildBaseWriter()) { - records.forEach(d -> { - try { - writer.write(d); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + protected static List writeBaseNoCommit( + KeyedTable table, long txId, List records) { + try (GenericBaseTaskWriter writer = + GenericTaskWriters.builderFor(table).withTransactionId(txId).buildBaseWriter()) { + records.forEach( + d -> { + try { + writer.write(d); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); WriteResult result = writer.complete(); return Arrays.asList(result.dataFiles()); } catch (IOException e) { diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseWithInitDataForTrino.java b/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseWithInitDataForTrino.java index 4196746623..a8c35792b1 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseWithInitDataForTrino.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/TableTestBaseWithInitDataForTrino.java @@ -43,14 +43,22 @@ protected List baseRecords() { GenericRecord record = GenericRecord.create(TABLE_SCHEMA); ImmutableList.Builder builder = ImmutableList.builder(); - builder.add(record.copy(ImmutableMap.of("id", 1, "name$name", "john", "op_time", - LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); - builder.add(record.copy(ImmutableMap.of("id", 2, "name$name", "lily", "op_time", - LocalDateTime.of(2022, 1, 2, 12, 0, 0)))); - builder.add(record.copy(ImmutableMap.of("id", 3, "name$name", "jake", "op_time", - LocalDateTime.of(2022, 1, 3, 12, 0, 0)))); - builder.add(record.copy(ImmutableMap.of("id", 4, "name$name", "sam", "op_time", - LocalDateTime.of(2022, 1, 4, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 1, "name$name", "john", "op_time", LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 2, "name$name", "lily", "op_time", LocalDateTime.of(2022, 1, 2, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 3, "name$name", "jake", "op_time", LocalDateTime.of(2022, 1, 3, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 4, "name$name", "sam", "op_time", LocalDateTime.of(2022, 1, 4, 12, 0, 0)))); return builder.build(); } @@ -59,8 +67,10 @@ protected List changeInsertRecords() { GenericRecord record = GenericRecord.create(TABLE_SCHEMA); ImmutableList.Builder builder = ImmutableList.builder(); - builder.add(record.copy(ImmutableMap.of("id", 5, "name$name", "mary", "op_time", - LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 5, "name$name", "mary", "op_time", LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); return builder.build(); } @@ -68,16 +78,20 @@ protected List changeSparkInsertRecords() { GenericRecord record = GenericRecord.create(TABLE_SCHEMA); ImmutableList.Builder builder = ImmutableList.builder(); - builder.add(record.copy(ImmutableMap.of("id", 6, "name$name", "mack", "op_time", - LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 6, "name$name", "mack", "op_time", LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); return builder.build(); } protected List changeDeleteRecords() { GenericRecord record = GenericRecord.create(TABLE_SCHEMA); ImmutableList.Builder builder = ImmutableList.builder(); - builder.add(record.copy(ImmutableMap.of("id", 5, "name$name", "mary", "op_time", - LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); + builder.add( + record.copy( + ImmutableMap.of( + "id", 5, "name$name", "mary", "op_time", LocalDateTime.of(2022, 1, 1, 12, 0, 0)))); return builder.build(); } @@ -87,75 +101,82 @@ protected List changeDeleteRecords() { protected void initData() throws IOException { long currentSequenceNumber = testKeyedTable.beginTransaction(null); - //write base + // write base { - GenericBaseTaskWriter writer = GenericTaskWriters.builderFor(testKeyedTable) - .withTransactionId(currentSequenceNumber).buildBaseWriter(); + GenericBaseTaskWriter writer = + GenericTaskWriters.builderFor(testKeyedTable) + .withTransactionId(currentSequenceNumber) + .buildBaseWriter(); for (Record record : baseRecords()) { writer.write(record); } WriteResult result = writer.complete(); AppendFiles baseAppend = testKeyedTable.baseTable().newAppend(); - dataFileForPositionDelete = Arrays.stream(result.dataFiles()) - .filter(s -> s.path().toString().contains("op_time_day=2022-01-04")).findAny().get(); + dataFileForPositionDelete = + Arrays.stream(result.dataFiles()) + .filter(s -> s.path().toString().contains("op_time_day=2022-01-04")) + .findAny() + .get(); Arrays.stream(result.dataFiles()).forEach(baseAppend::appendFile); baseAppend.commit(); } // write position delete { - SortedPosDeleteWriter writer = GenericTaskWriters.builderFor(testKeyedTable) - .withTransactionId(currentSequenceNumber).buildBasePosDeleteWriter(3, 3, dataFileForPositionDelete.partition()); + SortedPosDeleteWriter writer = + GenericTaskWriters.builderFor(testKeyedTable) + .withTransactionId(currentSequenceNumber) + .buildBasePosDeleteWriter(3, 3, dataFileForPositionDelete.partition()); writer.delete(dataFileForPositionDelete.path().toString(), 0); DeleteFile posDeleteFiles = writer.complete().stream().findAny().get(); this.deleteFileOfPositionDelete = posDeleteFiles; testKeyedTable.baseTable().newRowDelta().addDeletes(posDeleteFiles).commit(); } - //write change insert + // write change insert { - GenericChangeTaskWriter writer = GenericTaskWriters.builderFor(testKeyedTable) - .buildChangeWriter(); + GenericChangeTaskWriter writer = + GenericTaskWriters.builderFor(testKeyedTable).buildChangeWriter(); for (Record record : changeInsertRecords()) { writer.write(record); } WriteResult result = writer.complete(); AppendFiles changeAppend = testKeyedTable.changeTable().newAppend(); - Arrays.stream(result.dataFiles()) - .forEach(changeAppend::appendFile); + Arrays.stream(result.dataFiles()).forEach(changeAppend::appendFile); changeAppend.commit(); } - //begin spark insert + // begin spark insert currentSequenceNumber = testKeyedTable.beginTransaction(null); - //write change delete + // write change delete { - GenericChangeTaskWriter writer = GenericTaskWriters.builderFor(testKeyedTable) - .withChangeAction(ChangeAction.DELETE).buildChangeWriter(); + GenericChangeTaskWriter writer = + GenericTaskWriters.builderFor(testKeyedTable) + .withChangeAction(ChangeAction.DELETE) + .buildChangeWriter(); for (Record record : changeDeleteRecords()) { writer.write(record); } WriteResult result = writer.complete(); AppendFiles changeAppend = testKeyedTable.changeTable().newAppend(); - Arrays.stream(result.dataFiles()) - .forEach(changeAppend::appendFile); + Arrays.stream(result.dataFiles()).forEach(changeAppend::appendFile); changeAppend.commit(); } - //spark insert + // spark insert { - GenericChangeTaskWriter writer = GenericTaskWriters.builderFor(testKeyedTable) - .withTransactionId(currentSequenceNumber) - .buildChangeWriter(); + GenericChangeTaskWriter writer = + GenericTaskWriters.builderFor(testKeyedTable) + .withTransactionId(currentSequenceNumber) + .buildChangeWriter(); for (Record record : changeSparkInsertRecords()) { writer.write(record); } WriteResult result = writer.complete(); AppendFiles changeAppend = testKeyedTable.changeTable().newAppend(); - Arrays.stream(result.dataFiles()) - .forEach(changeAppend::appendFile); + Arrays.stream(result.dataFiles()).forEach(changeAppend::appendFile); changeAppend.commit(); } } diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/TestBaseArcticPrimaryTable.java b/trino/src/test/java/com/netease/arctic/trino/arctic/TestBaseArcticPrimaryTable.java index 6100bffaaf..8f07234b4b 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/TestBaseArcticPrimaryTable.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/TestBaseArcticPrimaryTable.java @@ -18,6 +18,9 @@ package com.netease.arctic.trino.arctic; +import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; +import static org.assertj.core.api.Assertions.assertThat; + import com.google.common.collect.ImmutableMap; import com.netease.arctic.TestedCatalogs; import com.netease.arctic.ams.api.TableFormat; @@ -25,8 +28,6 @@ import io.trino.testing.QueryRunner; import org.testng.annotations.AfterClass; import org.testng.annotations.Test; -import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; -import static org.assertj.core.api.Assertions.assertThat; public class TestBaseArcticPrimaryTable extends TableTestBaseWithInitDataForTrino { @@ -41,12 +42,12 @@ protected QueryRunner createQueryRunner() throws Exception { setupTables(); initData(); return ArcticQueryRunner.builder() - .setIcebergProperties(ImmutableMap.of( - "arctic.url", - String.format("thrift://localhost:%s/%s", AMS.port(), TEST_CATALOG_NAME), - "arctic.enable-split-task-by-delete-ratio", - "true" - )) + .setIcebergProperties( + ImmutableMap.of( + "arctic.url", + String.format("thrift://localhost:%s/%s", AMS.port(), TEST_CATALOG_NAME), + "arctic.enable-split-task-by-delete-ratio", + "true")) .build(); } @@ -54,11 +55,12 @@ protected QueryRunner createQueryRunner() throws Exception { public void testStats() { assertThat(query("SHOW STATS FOR " + PK_TABLE_FULL_NAME)) .skippingTypesCheck() - .matches("VALUES " + - "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + - "('name$name', 4805e-1, NULL, 0e0, NULL, NULL, NULL), " + - "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000', '2022-01-04 12:00:00.000000'), " + - "(NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); + .matches( + "VALUES " + + "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + + "('name$name', 4805e-1, NULL, 0e0, NULL, NULL, NULL), " + + "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000', '2022-01-04 12:00:00.000000'), " + + "(NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); } @Test @@ -70,10 +72,10 @@ public void tableMOR() throws InterruptedException { public void tableMORWithProject() throws InterruptedException { assertQuery( "select op_time, \"name$name\" from " + PK_TABLE_FULL_NAME, - "VALUES (TIMESTAMP '2022-01-01 12:00:00', 'john'), " + - "(TIMESTAMP'2022-01-02 12:00:00', 'lily'), " + - "(TIMESTAMP'2022-01-03 12:00:00', 'jake'), " + - "(TIMESTAMP'2022-01-01 12:00:00', 'mack')"); + "VALUES (TIMESTAMP '2022-01-01 12:00:00', 'john'), " + + "(TIMESTAMP'2022-01-02 12:00:00', 'lily'), " + + "(TIMESTAMP'2022-01-03 12:00:00', 'jake'), " + + "(TIMESTAMP'2022-01-01 12:00:00', 'mack')"); } @Test @@ -83,39 +85,42 @@ public void baseQuery() { @Test public void baseQueryWhenTableNameContainCatalogAndDataBase() { - assertQuery("select id from " + "arctic.test_db.\"arctic.test_db.test_pk_table#base\"", "VALUES 1, 2, 3"); + assertQuery( + "select id from " + "arctic.test_db.\"arctic.test_db.test_pk_table#base\"", + "VALUES 1, 2, 3"); } @Test public void baseQueryWhenTableNameContainDataBase() { - assertQuery("select id from " + "arctic.test_db.\"test_db.test_pk_table#base\"", "VALUES 1, 2, 3"); + assertQuery( + "select id from " + "arctic.test_db.\"test_db.test_pk_table#base\"", "VALUES 1, 2, 3"); } @Test public void changeQuery() { assertQuery( "select * from " + "arctic.test_db.\"test_pk_table#change\"", - "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); + "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); } @Test public void changeQueryWhenTableNameContainCatalogAndDataBase() { assertQuery( "select * from " + "arctic.test_db.\"arctic.test_db.test_pk_table#change\"", - "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); + "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); } @Test public void changeQueryWhenTableNameContainDataBase() { assertQuery( "select * from " + "arctic.test_db.\"test_db.test_pk_table#change\"", - "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + - "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); + "VALUES (6,'mack',TIMESTAMP '2022-01-01 12:00:00.000000' ,3,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',2,1,'INSERT')," + + "(5,'mary',TIMESTAMP '2022-01-01 12:00:00.000000',4,1,'DELETE')"); } @AfterClass(alwaysRun = true) diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTable.java b/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTable.java index 620f59633b..559194d331 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTable.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTable.java @@ -18,6 +18,12 @@ package com.netease.arctic.trino.arctic; +import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; +import static com.netease.arctic.table.TableProperties.BASE_FILE_FORMAT; +import static com.netease.arctic.table.TableProperties.CHANGE_FILE_FORMAT; +import static com.netease.arctic.table.TableProperties.DEFAULT_FILE_FORMAT; +import static org.assertj.core.api.Assertions.assertThat; + import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.netease.arctic.ams.api.MockArcticMetastoreServer; @@ -57,30 +63,41 @@ import java.util.StringJoiner; import java.util.stream.Collectors; -import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; -import static com.netease.arctic.table.TableProperties.BASE_FILE_FORMAT; -import static com.netease.arctic.table.TableProperties.CHANGE_FILE_FORMAT; -import static com.netease.arctic.table.TableProperties.DEFAULT_FILE_FORMAT; -import static org.assertj.core.api.Assertions.assertThat; - public class TestHiveTable extends TestHiveTableBaseForTrino { - private final String TEST_HIVE_TABLE_FULL_NAME = "arctic." + HIVE_TABLE_ID.getDatabase() + "." + HIVE_TABLE_ID.getTableName(); + private final String TEST_HIVE_TABLE_FULL_NAME = + "arctic." + HIVE_TABLE_ID.getDatabase() + "." + HIVE_TABLE_ID.getTableName(); private final String TEST_HIVE_PK_TABLE_FULL_NAME = "arctic." + HIVE_PK_TABLE_ID.getDatabase() + "." + HIVE_PK_TABLE_ID.getTableName(); private final String TEST_HIVE_PK_TABLE_FULL_NAME_BASE = - "arctic." + HIVE_PK_TABLE_ID.getDatabase() + "." + "\"" + HIVE_PK_TABLE_ID.getTableName() + "#base\""; + "arctic." + + HIVE_PK_TABLE_ID.getDatabase() + + "." + + "\"" + + HIVE_PK_TABLE_ID.getTableName() + + "#base\""; private final String TEST_UN_PARTITION_HIVE_TABLE_FULL_NAME = - "arctic." + UN_PARTITION_HIVE_TABLE_ID.getDatabase() + "." + UN_PARTITION_HIVE_TABLE_ID.getTableName(); + "arctic." + + UN_PARTITION_HIVE_TABLE_ID.getDatabase() + + "." + + UN_PARTITION_HIVE_TABLE_ID.getTableName(); private final String TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME = - "arctic." + UN_PARTITION_HIVE_PK_TABLE_ID.getDatabase() + "." + UN_PARTITION_HIVE_PK_TABLE_ID.getTableName(); + "arctic." + + UN_PARTITION_HIVE_PK_TABLE_ID.getDatabase() + + "." + + UN_PARTITION_HIVE_PK_TABLE_ID.getTableName(); private final String TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME_BASE = - "arctic." + UN_PARTITION_HIVE_PK_TABLE_ID.getDatabase() + "." + "\"" + UN_PARTITION_HIVE_PK_TABLE_ID.getTableName() + "#base\""; + "arctic." + + UN_PARTITION_HIVE_PK_TABLE_ID.getDatabase() + + "." + + "\"" + + UN_PARTITION_HIVE_PK_TABLE_ID.getTableName() + + "#base\""; private static final TableIdentifier HIVE_PK_TABLE_ORC_ID = TableIdentifier.of(TEST_CATALOG_NAME, HIVE_DB_NAME, "test_pk_hive_table_orc"); @@ -102,20 +119,26 @@ public class TestHiveTable extends TestHiveTableBaseForTrino { @Override protected void setupTables() throws Exception { super.setupTables(); - //added for test parquet and orc metrics - testKeyedHiveTableOrc = (KeyedHiveTable) hiveCatalog - .newTableBuilder(HIVE_PK_TABLE_ORC_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table_orc") - .withPartitionSpec(HIVE_SPEC) - .withPrimaryKeySpec(PRIMARY_KEY_SPEC) - .create().asKeyedTable(); - - testKeyedHiveTableParquet = (KeyedHiveTable) hiveCatalog - .newTableBuilder(HIVE_PK_TABLE_PARQUET_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table_parquet") - .withPartitionSpec(HIVE_SPEC) - .withPrimaryKeySpec(PRIMARY_KEY_SPEC) - .create().asKeyedTable(); + // added for test parquet and orc metrics + testKeyedHiveTableOrc = + (KeyedHiveTable) + hiveCatalog + .newTableBuilder(HIVE_PK_TABLE_ORC_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table_orc") + .withPartitionSpec(HIVE_SPEC) + .withPrimaryKeySpec(PRIMARY_KEY_SPEC) + .create() + .asKeyedTable(); + + testKeyedHiveTableParquet = + (KeyedHiveTable) + hiveCatalog + .newTableBuilder(HIVE_PK_TABLE_PARQUET_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table_parquet") + .withPartitionSpec(HIVE_SPEC) + .withPrimaryKeySpec(PRIMARY_KEY_SPEC) + .create() + .asKeyedTable(); } @Override @@ -126,8 +149,10 @@ protected QueryRunner createQueryRunner() throws Exception { initData(); return ArcticQueryRunner.builder() .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) - .setIcebergProperties(ImmutableMap.of("arctic.url", - String.format("thrift://localhost:%s/%s", AMS.port(), TEST_CATALOG_NAME))) + .setIcebergProperties( + ImmutableMap.of( + "arctic.url", + String.format("thrift://localhost:%s/%s", AMS.port(), TEST_CATALOG_NAME))) .build(); } @@ -138,102 +163,156 @@ private void initData() throws IOException { write(testKeyedHiveTable, ChangeLocationKind.INSTANT, HiveTestRecords.changeInsertRecords()); write(testKeyedHiveTable, BaseLocationKind.INSTANT, HiveTestRecords.baseRecords()); write(testKeyedHiveTable, HiveLocationKind.INSTANT, HiveTestRecords.hiveRecords()); - write(testKeyedHiveTable, ChangeLocationKind.INSTANT, HiveTestRecords.changeDeleteRecords(), ChangeAction.DELETE); + write( + testKeyedHiveTable, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeDeleteRecords(), + ChangeAction.DELETE); write(testUnPartitionHiveTable, BaseLocationKind.INSTANT, HiveTestRecords.baseRecords()); write(testUnPartitionHiveTable, HiveLocationKind.INSTANT, HiveTestRecords.hiveRecords()); - write(testUnPartitionKeyedHiveTable, ChangeLocationKind.INSTANT, HiveTestRecords.changeInsertRecords()); + write( + testUnPartitionKeyedHiveTable, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeInsertRecords()); write(testUnPartitionKeyedHiveTable, BaseLocationKind.INSTANT, HiveTestRecords.baseRecords()); write(testUnPartitionKeyedHiveTable, HiveLocationKind.INSTANT, HiveTestRecords.hiveRecords()); - write(testUnPartitionKeyedHiveTable, ChangeLocationKind.INSTANT, HiveTestRecords.changeDeleteRecords(), + write( + testUnPartitionKeyedHiveTable, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeDeleteRecords(), ChangeAction.DELETE); - write(testKeyedHiveTableOrc, ChangeLocationKind.INSTANT, HiveTestRecords.changeInsertRecords(), FileFormat.ORC); - write(testKeyedHiveTableOrc, BaseLocationKind.INSTANT, HiveTestRecords.baseRecords(), FileFormat.ORC); - write(testKeyedHiveTableOrc, HiveLocationKind.INSTANT, HiveTestRecords.hiveRecords(), FileFormat.ORC); - write(testKeyedHiveTableOrc, ChangeLocationKind.INSTANT, HiveTestRecords.changeDeleteRecords(), - ChangeAction.DELETE, FileFormat.ORC); - - write(testKeyedHiveTableParquet, ChangeLocationKind.INSTANT, HiveTestRecords.changeInsertRecords(), + write( + testKeyedHiveTableOrc, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeInsertRecords(), + FileFormat.ORC); + write( + testKeyedHiveTableOrc, + BaseLocationKind.INSTANT, + HiveTestRecords.baseRecords(), + FileFormat.ORC); + write( + testKeyedHiveTableOrc, + HiveLocationKind.INSTANT, + HiveTestRecords.hiveRecords(), + FileFormat.ORC); + write( + testKeyedHiveTableOrc, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeDeleteRecords(), + ChangeAction.DELETE, + FileFormat.ORC); + + write( + testKeyedHiveTableParquet, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeInsertRecords(), + FileFormat.PARQUET); + write( + testKeyedHiveTableParquet, + BaseLocationKind.INSTANT, + HiveTestRecords.baseRecords(), + FileFormat.PARQUET); + write( + testKeyedHiveTableParquet, + HiveLocationKind.INSTANT, + HiveTestRecords.hiveRecords(), + FileFormat.PARQUET); + write( + testKeyedHiveTableParquet, + ChangeLocationKind.INSTANT, + HiveTestRecords.changeDeleteRecords(), + ChangeAction.DELETE, FileFormat.PARQUET); - write(testKeyedHiveTableParquet, BaseLocationKind.INSTANT, HiveTestRecords.baseRecords(), FileFormat.PARQUET); - write(testKeyedHiveTableParquet, HiveLocationKind.INSTANT, HiveTestRecords.hiveRecords(), FileFormat.PARQUET); - write(testKeyedHiveTableParquet, ChangeLocationKind.INSTANT, HiveTestRecords.changeDeleteRecords(), - ChangeAction.DELETE, FileFormat.PARQUET); } @Test public void testHiveTableMOR() throws InterruptedException { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " - + TEST_HIVE_TABLE_FULL_NAME, ImmutableList.of(v1, v2, v3, v4)); + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_HIVE_TABLE_FULL_NAME, + ImmutableList.of(v1, v2, v3, v4)); } @Test public void testKeyedHiveTableMOR() { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + TEST_HIVE_PK_TABLE_FULL_NAME, + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_HIVE_PK_TABLE_FULL_NAME, ImmutableList.of(v2, v4, v6)); } @Test public void testKeyedHiveTableBase() { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " - + TEST_HIVE_PK_TABLE_FULL_NAME_BASE, ImmutableList.of(v1, v2, v3, v4)); + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_HIVE_PK_TABLE_FULL_NAME_BASE, + ImmutableList.of(v1, v2, v3, v4)); } @Test public void testNoPartitionHiveTableMOR() { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " - + TEST_HIVE_PK_TABLE_FULL_NAME_BASE, ImmutableList.of(v1, v2, v3, v4)); + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_HIVE_PK_TABLE_FULL_NAME_BASE, + ImmutableList.of(v1, v2, v3, v4)); } @Test public void testNoPartitionKeyedHiveTableMOR() { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " - + TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME, ImmutableList.of(v2, v4, v6)); + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME, + ImmutableList.of(v2, v4, v6)); } @Test public void testNoPartitionKeyedHiveTableBase() { - assertCommon("select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " - + TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME_BASE, ImmutableList.of(v1, v2, v3, v4)); + assertCommon( + "select id, name, op_time, \"d$d\", map_name, array_name, struct_name from " + + TEST_UN_PARTITION_HIVE_PK_TABLE_FULL_NAME_BASE, + ImmutableList.of(v1, v2, v3, v4)); } @Test public void testParquetStats() { assertThat(query("SHOW STATS FOR " + TEST_HIVE_PK_TABLE_PARQUET_FULL_NAME)) .skippingTypesCheck() - .matches("VALUES " + - "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + - "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000', '2022-01-04 12:00:00.000000'), " + - "('op_time_with_zone', NULL, NULL, 0e0, NULL,'2022-01-01 12:00:00.000 UTC', '2022-01-04 12:00:00.000 UTC'), " + - "('d$d', NULL, NULL, 0e0, NULL, '100.0', '105.0'), " + - "('map_name', NULL, NULL, NULL, NULL, NULL, NULL), " + - "('array_name', NULL, NULL, NULL, NULL, NULL, NULL), " + - "('struct_name', NULL, NULL, NULL, NULL, NULL, NULL), " + - "('name', 618e0, NULL, 0e0, NULL, NULL, NULL), " + - "(NULL, NULL, NULL, NULL, 9e0, NULL, NULL)"); + .matches( + "VALUES " + + "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + + "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000', '2022-01-04 12:00:00.000000'), " + + "('op_time_with_zone', NULL, NULL, 0e0, NULL,'2022-01-01 12:00:00.000 UTC', '2022-01-04 12:00:00.000 UTC'), " + + "('d$d', NULL, NULL, 0e0, NULL, '100.0', '105.0'), " + + "('map_name', NULL, NULL, NULL, NULL, NULL, NULL), " + + "('array_name', NULL, NULL, NULL, NULL, NULL, NULL), " + + "('struct_name', NULL, NULL, NULL, NULL, NULL, NULL), " + + "('name', 618e0, NULL, 0e0, NULL, NULL, NULL), " + + "(NULL, NULL, NULL, NULL, 9e0, NULL, NULL)"); } @Test public void testOrcStats() { assertThat(query("SHOW STATS FOR " + TEST_HIVE_PK_TABLE_ORC_FULL_NAME)) .skippingTypesCheck() - .matches("VALUES " + - "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + - "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000'," - + " '2022-01-04 12:00:00.000000'), " + - "('op_time_with_zone', NULL, NULL, 0e0, NULL, " - + "'2022-01-01 12:00:00.000 UTC', '2022-01-04 12:00:00.000 UTC'), " + - "('d$d', NULL, NULL, 0e0, NULL, '100.0', '105.0'), " + - "('map_name', 27e0, NULL, 0e0, NULL, NULL, NULL), " + - "('array_name', 27e0, NULL, 0e0, NULL, NULL, NULL), " + - "('struct_name', 0e0, NULL, 0e0, NULL, NULL, NULL), " + - "('name', 156e0, NULL, 0e0, NULL, NULL, NULL), " + - "(NULL, NULL, NULL, NULL, 9e0, NULL, NULL)"); + .matches( + "VALUES " + + "('id', NULL, NULL, 0e0, NULL, '1', '6'), " + + "('op_time', NULL, NULL, 0e0, NULL, '2022-01-01 12:00:00.000000'," + + " '2022-01-04 12:00:00.000000'), " + + "('op_time_with_zone', NULL, NULL, 0e0, NULL, " + + "'2022-01-01 12:00:00.000 UTC', '2022-01-04 12:00:00.000 UTC'), " + + "('d$d', NULL, NULL, 0e0, NULL, '100.0', '105.0'), " + + "('map_name', 27e0, NULL, 0e0, NULL, NULL, NULL), " + + "('array_name', 27e0, NULL, 0e0, NULL, NULL, NULL), " + + "('struct_name', 0e0, NULL, 0e0, NULL, NULL, NULL), " + + "('name', 156e0, NULL, 0e0, NULL, NULL, NULL), " + + "(NULL, NULL, NULL, NULL, 9e0, NULL, NULL)"); } - private void assertCommon(String query, List> values) { QueryAssertions.QueryAssert queryAssert = assertThat(query(query)); StringJoiner stringJoiner = new StringJoiner(",", "VALUES", ""); @@ -249,29 +328,35 @@ public void clear() { stopMetastore(); } - private void write(ArcticTable table, LocationKind locationKind, List records) throws - IOException { + private void write(ArcticTable table, LocationKind locationKind, List records) + throws IOException { write(table, locationKind, records, ChangeAction.INSERT, null); } - private void write(ArcticTable table, LocationKind locationKind, List records, - ChangeAction changeAction) throws - IOException { + private void write( + ArcticTable table, LocationKind locationKind, List records, ChangeAction changeAction) + throws IOException { write(table, locationKind, records, changeAction, null); } - private void write(ArcticTable table, LocationKind locationKind, List records, FileFormat fileFormat) throws - IOException { + private void write( + ArcticTable table, LocationKind locationKind, List records, FileFormat fileFormat) + throws IOException { write(table, locationKind, records, ChangeAction.INSERT, fileFormat); } - private void write(ArcticTable table, LocationKind locationKind, List records, - ChangeAction changeAction, FileFormat fileFormat) throws IOException { + private void write( + ArcticTable table, + LocationKind locationKind, + List records, + ChangeAction changeAction, + FileFormat fileFormat) + throws IOException { List> writers; if (fileFormat == null) { writers = genWriters(table, locationKind, changeAction, FileFormat.PARQUET, FileFormat.ORC); } else { - writers = genWriters(table,locationKind, changeAction, fileFormat); + writers = genWriters(table, locationKind, changeAction, fileFormat); } for (int i = 0; i < records.size(); i++) { @@ -297,8 +382,11 @@ private void write(ArcticTable table, LocationKind locationKind, List re } } - private List> genWriters(ArcticTable table,LocationKind locationKind, - ChangeAction changeAction, FileFormat... fileFormat) { + private List> genWriters( + ArcticTable table, + LocationKind locationKind, + ChangeAction changeAction, + FileFormat... fileFormat) { List> result = Lists.newArrayList(); for (FileFormat format : fileFormat) { UpdateProperties updateProperties = table.updateProperties(); @@ -306,10 +394,12 @@ private List> genWriters(ArcticTable table,LocationKind locat updateProperties.set(CHANGE_FILE_FORMAT, format.name()); updateProperties.set(DEFAULT_FILE_FORMAT, format.name()); updateProperties.commit(); - AdaptHiveGenericTaskWriterBuilder builder = AdaptHiveGenericTaskWriterBuilder - .builderFor(table); + AdaptHiveGenericTaskWriterBuilder builder = + AdaptHiveGenericTaskWriterBuilder.builderFor(table); TaskWriter writer = - builder.withChangeAction(changeAction).withTransactionId(table.isKeyedTable() ? txid++ : null) + builder + .withChangeAction(changeAction) + .withTransactionId(table.isKeyedTable() ? txid++ : null) .buildWriter(locationKind); result.add(writer); } @@ -317,11 +407,13 @@ private List> genWriters(ArcticTable table,LocationKind locat } private CloseableIterable readParquet(Schema schema, String path) { - AdaptHiveParquet.ReadBuilder builder = AdaptHiveParquet.read( - Files.localInput(new File(path))) - .project(schema) - .createReaderFunc(fileSchema -> AdaptHiveGenericParquetReaders.buildReader(schema, fileSchema, new HashMap<>())) - .caseSensitive(false); + AdaptHiveParquet.ReadBuilder builder = + AdaptHiveParquet.read(Files.localInput(new File(path))) + .project(schema) + .createReaderFunc( + fileSchema -> + AdaptHiveGenericParquetReaders.buildReader(schema, fileSchema, new HashMap<>())) + .caseSensitive(false); CloseableIterable iterable = builder.build(); return iterable; @@ -331,58 +423,58 @@ private String base(String table) { return "\"" + table + "#base\""; } - List v1 = ImmutableList.of( - "1", - "varchar 'john'", - "TIMESTAMP'2022-01-01 12:00:00.000000'", - "CAST(100 AS decimal(10,0))", - "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", - "ARRAY[varchar 'array_element']", - "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + - "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))" - ); - - List v2 = ImmutableList.of( - "2", - "varchar 'lily'", - "TIMESTAMP'2022-01-02 12:00:00.000000'", - "CAST(101 AS decimal(10,0))", - "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", - "ARRAY[varchar 'array_element']", - "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + - "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))" - ); - - List v3 = ImmutableList.of( - "3", - "varchar 'jake'", - "TIMESTAMP'2022-01-03 12:00:00.000000'", - "CAST(102 AS decimal(10,0))", - "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", - "ARRAY[varchar 'array_element']", - "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + - "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))" - ); - - List v4 = ImmutableList.of( - "4", - "varchar 'sam'", - "TIMESTAMP'2022-01-04 12:00:00.000000'", - "CAST(103 AS decimal(10,0))", - "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", - "ARRAY[varchar 'array_element']", - "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + - "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))" - ); - - List v6 = ImmutableList.of( - "6", - "varchar 'mack'", - "TIMESTAMP'2022-01-01 12:00:00.000000'", - "CAST(105 AS decimal(10,0))", - "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", - "ARRAY[varchar 'array_element']", - "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + - "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))" - ); + List v1 = + ImmutableList.of( + "1", + "varchar 'john'", + "TIMESTAMP'2022-01-01 12:00:00.000000'", + "CAST(100 AS decimal(10,0))", + "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", + "ARRAY[varchar 'array_element']", + "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + + "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))"); + + List v2 = + ImmutableList.of( + "2", + "varchar 'lily'", + "TIMESTAMP'2022-01-02 12:00:00.000000'", + "CAST(101 AS decimal(10,0))", + "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", + "ARRAY[varchar 'array_element']", + "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + + "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))"); + + List v3 = + ImmutableList.of( + "3", + "varchar 'jake'", + "TIMESTAMP'2022-01-03 12:00:00.000000'", + "CAST(102 AS decimal(10,0))", + "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", + "ARRAY[varchar 'array_element']", + "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + + "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))"); + + List v4 = + ImmutableList.of( + "4", + "varchar 'sam'", + "TIMESTAMP'2022-01-04 12:00:00.000000'", + "CAST(103 AS decimal(10,0))", + "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", + "ARRAY[varchar 'array_element']", + "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + + "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))"); + + List v6 = + ImmutableList.of( + "6", + "varchar 'mack'", + "TIMESTAMP'2022-01-01 12:00:00.000000'", + "CAST(105 AS decimal(10,0))", + "map(ARRAY[varchar 'map_key'],ARRAY[varchar 'map_value'])", + "ARRAY[varchar 'array_element']", + "CAST(ROW(varchar 'struct_sub1', varchar 'struct_sub2') " + + "AS ROW(struct_name_sub_1 varchar, struct_name_sub_2 varchar))"); } diff --git a/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTableBaseForTrino.java b/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTableBaseForTrino.java index 493fc662cc..e2bc519e54 100644 --- a/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTableBaseForTrino.java +++ b/trino/src/test/java/com/netease/arctic/trino/arctic/TestHiveTableBaseForTrino.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino.arctic; +import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; + import com.netease.arctic.ams.api.TableFormat; import com.netease.arctic.catalog.CatalogLoader; import com.netease.arctic.hive.HMSMockServer; @@ -43,20 +45,20 @@ import org.apache.thrift.TException; import org.junit.Assert; import org.junit.rules.TemporaryFolder; + import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import static com.netease.arctic.ams.api.MockArcticMetastoreServer.TEST_CATALOG_NAME; - public abstract class TestHiveTableBaseForTrino extends TableTestBaseForTrino { protected static final String HIVE_DB_NAME = "hivedb"; protected static final String HIVE_CATALOG_NAME = "hive_catalog"; protected static final AtomicInteger testCount = new AtomicInteger(0); private static final TemporaryFolder tempFolder = new TemporaryFolder(); + @ManageTestResources.Suppress(because = "no need") protected static HMSMockServer hms; @@ -81,23 +83,28 @@ public abstract class TestHiveTableBaseForTrino extends TableTestBaseForTrino { public static final String COLUMN_NAME_STRUCT_SUB1 = "struct_name_sub_1"; public static final String COLUMN_NAME_STRUCT_SUB2 = "struct_name_sub_2"; - public static final Schema STRUCT_SUB_SCHEMA = new Schema( - Types.NestedField.required(12, COLUMN_NAME_STRUCT_SUB1, Types.StringType.get()), - Types.NestedField.required(13, COLUMN_NAME_STRUCT_SUB2, Types.StringType.get()) - ); + public static final Schema STRUCT_SUB_SCHEMA = + new Schema( + Types.NestedField.required(12, COLUMN_NAME_STRUCT_SUB1, Types.StringType.get()), + Types.NestedField.required(13, COLUMN_NAME_STRUCT_SUB2, Types.StringType.get())); private static int i = 0; - public static final Schema HIVE_TABLE_SCHEMA = new Schema( - Types.NestedField.required(++i, COLUMN_NAME_ID, Types.IntegerType.get()), - Types.NestedField.required(++i, COLUMN_NAME_OP_TIME, Types.TimestampType.withoutZone()), - Types.NestedField.required(++i, COLUMN_NAME_OP_TIME_WITH_ZONE, Types.TimestampType.withZone()), - Types.NestedField.required(++i, COLUMN_NAME_D, Types.DecimalType.of(10, 0)), - Types.NestedField.required(++i, COLUMN_NAME_MAP, Types.MapType.ofOptional(++i, ++i, Types.StringType.get(), - Types.StringType.get())), - Types.NestedField.required(++i, COLUMN_NAME_ARRAY, Types.ListType.ofOptional(++i, Types.StringType.get())), - Types.NestedField.required(++i, COLUMN_NAME_STRUCT, Types.StructType.of(STRUCT_SUB_SCHEMA.columns())), - Types.NestedField.required(++i, COLUMN_NAME_NAME, Types.StringType.get()) - ); + public static final Schema HIVE_TABLE_SCHEMA = + new Schema( + Types.NestedField.required(++i, COLUMN_NAME_ID, Types.IntegerType.get()), + Types.NestedField.required(++i, COLUMN_NAME_OP_TIME, Types.TimestampType.withoutZone()), + Types.NestedField.required( + ++i, COLUMN_NAME_OP_TIME_WITH_ZONE, Types.TimestampType.withZone()), + Types.NestedField.required(++i, COLUMN_NAME_D, Types.DecimalType.of(10, 0)), + Types.NestedField.required( + ++i, + COLUMN_NAME_MAP, + Types.MapType.ofOptional(++i, ++i, Types.StringType.get(), Types.StringType.get())), + Types.NestedField.required( + ++i, COLUMN_NAME_ARRAY, Types.ListType.ofOptional(++i, Types.StringType.get())), + Types.NestedField.required( + ++i, COLUMN_NAME_STRUCT, Types.StructType.of(STRUCT_SUB_SCHEMA.columns())), + Types.NestedField.required(++i, COLUMN_NAME_NAME, Types.StringType.get())); protected static final PartitionSpec HIVE_SPEC = PartitionSpec.builderFor(HIVE_TABLE_SCHEMA).identity(COLUMN_NAME_NAME).build(); @@ -136,29 +143,41 @@ protected static void stopMetastore() { protected void setupTables() throws Exception { hiveCatalog = (ArcticHiveCatalog) CatalogLoader.load(AMS.getUrl(TEST_CATALOG_NAME)); - testHiveTable = (UnkeyedHiveTable) hiveCatalog - .newTableBuilder(HIVE_TABLE_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/table") - .withPartitionSpec(HIVE_SPEC) - .create().asUnkeyedTable(); - - testUnPartitionHiveTable = (UnkeyedHiveTable) hiveCatalog - .newTableBuilder(UN_PARTITION_HIVE_TABLE_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/un_partition_table") - .create().asUnkeyedTable(); - - testKeyedHiveTable = (KeyedHiveTable) hiveCatalog - .newTableBuilder(HIVE_PK_TABLE_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table") - .withPartitionSpec(HIVE_SPEC) - .withPrimaryKeySpec(PRIMARY_KEY_SPEC) - .create().asKeyedTable(); - - testUnPartitionKeyedHiveTable = (KeyedHiveTable) hiveCatalog - .newTableBuilder(UN_PARTITION_HIVE_PK_TABLE_ID, HIVE_TABLE_SCHEMA) - .withProperty(TableProperties.LOCATION, warehousePath() + "/un_partition_pk_table") - .withPrimaryKeySpec(PRIMARY_KEY_SPEC) - .create().asKeyedTable(); + testHiveTable = + (UnkeyedHiveTable) + hiveCatalog + .newTableBuilder(HIVE_TABLE_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/table") + .withPartitionSpec(HIVE_SPEC) + .create() + .asUnkeyedTable(); + + testUnPartitionHiveTable = + (UnkeyedHiveTable) + hiveCatalog + .newTableBuilder(UN_PARTITION_HIVE_TABLE_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/un_partition_table") + .create() + .asUnkeyedTable(); + + testKeyedHiveTable = + (KeyedHiveTable) + hiveCatalog + .newTableBuilder(HIVE_PK_TABLE_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/pk_table") + .withPartitionSpec(HIVE_SPEC) + .withPrimaryKeySpec(PRIMARY_KEY_SPEC) + .create() + .asKeyedTable(); + + testUnPartitionKeyedHiveTable = + (KeyedHiveTable) + hiveCatalog + .newTableBuilder(UN_PARTITION_HIVE_PK_TABLE_ID, HIVE_TABLE_SCHEMA) + .withProperty(TableProperties.LOCATION, warehousePath() + "/un_partition_pk_table") + .withPrimaryKeySpec(PRIMARY_KEY_SPEC) + .create() + .asKeyedTable(); } protected void clearTable() { @@ -172,7 +191,9 @@ protected void clearTable() { AMS.handler().getTableCommitMetas().remove(HIVE_PK_TABLE_ID.buildTableIdentifier()); hiveCatalog.dropTable(UN_PARTITION_HIVE_PK_TABLE_ID, true); - AMS.handler().getTableCommitMetas().remove(UN_PARTITION_HIVE_PK_TABLE_ID.buildTableIdentifier()); + AMS.handler() + .getTableCommitMetas() + .remove(UN_PARTITION_HIVE_PK_TABLE_ID.buildTableIdentifier()); AMS = null; } @@ -188,10 +209,11 @@ public DataFileBuilder(ArcticTable table) throws TException { } public DataFile build(String valuePath, String path) { - DataFiles.Builder builder = DataFiles.builder(table.spec()) - .withPath(hiveTable.getSd().getLocation() + path) - .withFileSizeInBytes(0) - .withRecordCount(2); + DataFiles.Builder builder = + DataFiles.builder(table.spec()) + .withPath(hiveTable.getSd().getLocation() + path) + .withFileSizeInBytes(0) + .withRecordCount(2); if (!StringUtils.isEmpty(valuePath)) { builder = builder.withPartitionPath(valuePath); @@ -200,9 +222,9 @@ public DataFile build(String valuePath, String path) { } public List buildList(List> partValueFiles) { - return partValueFiles.stream().map( - kv -> this.build(kv.getKey(), kv.getValue()) - ).collect(Collectors.toList()); + return partValueFiles.stream() + .map(kv -> this.build(kv.getKey(), kv.getValue())) + .collect(Collectors.toList()); } } @@ -223,38 +245,40 @@ public static String getPartitionPath(List values, PartitionSpec spec) { * @param table * @throws TException */ - public static void assertHivePartitionLocations(Map partitionLocations, ArcticTable table) - throws TException { + public static void assertHivePartitionLocations( + Map partitionLocations, ArcticTable table) throws TException { TableIdentifier identifier = table.id(); final String database = identifier.getDatabase(); final String tableName = identifier.getTableName(); - List partitions = hms.getClient().listPartitions( - database, - tableName, - (short) -1); + List partitions = hms.getClient().listPartitions(database, tableName, (short) -1); System.out.println("> assert hive partition location as expected"); - System.out.printf("HiveTable[%s.%s] partition count: %d \n", database, tableName, partitions.size()); + System.out.printf( + "HiveTable[%s.%s] partition count: %d \n", database, tableName, partitions.size()); for (Partition p : partitions) { System.out.printf( - "HiveTablePartition[%s.%s %s] location:%s \n", database, tableName, - Joiner.on("/").join(p.getValues()), p.getSd().getLocation()); + "HiveTablePartition[%s.%s %s] location:%s \n", + database, tableName, Joiner.on("/").join(p.getValues()), p.getSd().getLocation()); } - Assert.assertEquals("expect " + partitionLocations.size() + " partition after first rewrite partition", - partitionLocations.size(), partitions.size()); + Assert.assertEquals( + "expect " + partitionLocations.size() + " partition after first rewrite partition", + partitionLocations.size(), + partitions.size()); for (Partition p : partitions) { String valuePath = getPartitionPath(p.getValues(), table.spec()); Assert.assertTrue( - "partition " + valuePath + " is not expected", - partitionLocations.containsKey(valuePath)); + "partition " + valuePath + " is not expected", partitionLocations.containsKey(valuePath)); String locationExpect = partitionLocations.get(valuePath); String actualLocation = p.getSd().getLocation(); Assert.assertTrue( - "partition location is not expected, expect " + actualLocation + " end-with " + locationExpect, + "partition location is not expected, expect " + + actualLocation + + " end-with " + + locationExpect, actualLocation.contains(locationExpect)); } } diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunner.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunner.java index 9ddf998a37..ccc8d279f8 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunner.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunner.java @@ -18,6 +18,11 @@ package com.netease.arctic.trino.iceberg; +import static com.google.common.base.Preconditions.checkState; +import static io.airlift.testing.Closeables.closeAllSuppress; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; @@ -30,41 +35,29 @@ import java.util.Map; import java.util.Optional; -import static com.google.common.base.Preconditions.checkState; -import static io.airlift.testing.Closeables.closeAllSuppress; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static java.util.Objects.requireNonNull; - public final class ArcticQueryRunner { private static final Logger log = Logger.get(ArcticQueryRunner.class); public static final String ARCTIC_CATALOG = "arctic"; - private ArcticQueryRunner() { - } + private ArcticQueryRunner() {} public static DistributedQueryRunner createIcebergQueryRunner(TpchTable... tables) throws Exception { - return builder() - .setInitialTables(tables) - .build(); + return builder().setInitialTables(tables).build(); } public static Builder builder() { return new Builder(); } - public static class Builder - extends DistributedQueryRunner.Builder { + public static class Builder extends DistributedQueryRunner.Builder { private Optional metastoreDirectory = Optional.empty(); private ImmutableMap.Builder icebergProperties = ImmutableMap.builder(); private Optional schemaInitializer = Optional.empty(); protected Builder() { - super(testSessionBuilder() - .setCatalog(ARCTIC_CATALOG) - .setSchema("tpch") - .build()); + super(testSessionBuilder().setCatalog(ARCTIC_CATALOG).setSchema("tpch").build()); } public Builder setMetastoreDirectory(File metastoreDirectory) { @@ -73,8 +66,9 @@ public Builder setMetastoreDirectory(File metastoreDirectory) { } public Builder setIcebergProperties(Map icebergProperties) { - this.icebergProperties = ImmutableMap.builder() - .putAll(requireNonNull(icebergProperties, "icebergProperties is null")); + this.icebergProperties = + ImmutableMap.builder() + .putAll(requireNonNull(icebergProperties, "icebergProperties is null")); return self(); } @@ -94,23 +88,30 @@ public Builder setInitialTables(Iterable> initialTables) { public Builder setSchemaInitializer(SchemaInitializer schemaInitializer) { checkState(this.schemaInitializer.isEmpty(), "schemaInitializer is already set"); - this.schemaInitializer = Optional.of(requireNonNull(schemaInitializer, "schemaInitializer is null")); + this.schemaInitializer = + Optional.of(requireNonNull(schemaInitializer, "schemaInitializer is null")); amendSession(sessionBuilder -> sessionBuilder.setSchema(schemaInitializer.getSchemaName())); return self(); } @Override - public DistributedQueryRunner build() - throws Exception { + public DistributedQueryRunner build() throws Exception { DistributedQueryRunner queryRunner = super.build(); try { queryRunner.installPlugin(new TpchPlugin()); queryRunner.createCatalog("tpch", "tpch"); queryRunner.installPlugin(new TestArcticPlugin()); - Map icebergProperties = new HashMap<>(this.icebergProperties.buildOrThrow()); - icebergProperties.put("arctic.url", queryRunner.getCoordinator() - .getBaseDataDir().resolve("arctic").toAbsolutePath().toString()); + Map icebergProperties = + new HashMap<>(this.icebergProperties.buildOrThrow()); + icebergProperties.put( + "arctic.url", + queryRunner + .getCoordinator() + .getBaseDataDir() + .resolve("arctic") + .toAbsolutePath() + .toString()); queryRunner.createCatalog(ARCTIC_CATALOG, "arctic", icebergProperties); schemaInitializer.orElse(SchemaInitializer.builder().build()).accept(queryRunner); @@ -122,13 +123,13 @@ public DistributedQueryRunner build() } } - public static void main(String[] args) - throws Exception { + public static void main(String[] args) throws Exception { DistributedQueryRunner queryRunner = null; - queryRunner = ArcticQueryRunner.builder() - .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) - .setInitialTables(TpchTable.getTables()) - .build(); + queryRunner = + ArcticQueryRunner.builder() + .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) + .setInitialTables(TpchTable.getTables()) + .build(); Thread.sleep(10); Logger log = Logger.get(ArcticQueryRunner.class); log.info("======== SERVER STARTED ========"); diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunnerForClient.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunnerForClient.java index ef85de7056..624a031330 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunnerForClient.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/ArcticQueryRunnerForClient.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino.iceberg; +import static io.trino.testing.TestingSession.testSessionBuilder; + import com.google.common.collect.ImmutableMap; import com.netease.arctic.trino.ArcticPlugin; import io.airlift.log.Logger; @@ -28,48 +30,41 @@ import java.nio.file.Path; import java.util.Map; -import static io.trino.testing.TestingSession.testSessionBuilder; - public final class ArcticQueryRunnerForClient { private static final Logger log = Logger.get(ArcticQueryRunnerForClient.class); public static final String ARCTIC_CATALOG = "arctic"; - private ArcticQueryRunnerForClient() { - } + private ArcticQueryRunnerForClient() {} - public static DistributedQueryRunner createIcebergQueryRunner(Map extraProperties, String url) throws Exception { + public static DistributedQueryRunner createIcebergQueryRunner( + Map extraProperties, String url) throws Exception { return createIcebergQueryRunner(extraProperties, false, url); } public static DistributedQueryRunner createIcebergQueryRunner( - Map extraProperties, - boolean createTpchTables, String url) - throws Exception { - Session session = testSessionBuilder() - .setCatalog(ARCTIC_CATALOG) - .build(); + Map extraProperties, boolean createTpchTables, String url) throws Exception { + Session session = testSessionBuilder().setCatalog(ARCTIC_CATALOG).build(); - DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session) - .setExtraProperties(extraProperties) - .setNodeCount(1) - .build(); + DistributedQueryRunner queryRunner = + DistributedQueryRunner.builder(session) + .setExtraProperties(extraProperties) + .setNodeCount(1) + .build(); Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("arctic_data"); Path catalogDir = dataDir.getParent().resolve("catalog"); queryRunner.installPlugin(new ArcticPlugin()); - Map icebergProperties = ImmutableMap.builder() - .put("arctic.url", url) - .build(); + Map icebergProperties = + ImmutableMap.builder().put("arctic.url", url).build(); queryRunner.createCatalog(ARCTIC_CATALOG, "arctic", icebergProperties); return queryRunner; } - public static void main(String[] args) - throws Exception { + public static void main(String[] args) throws Exception { Logging.initialize(); Map properties = ImmutableMap.of("http-server.http.port", "8080"); String url = args[0]; diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/BaseConnectorTest.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/BaseConnectorTest.java index 6723d496fb..e71b8eeffa 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/BaseConnectorTest.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/BaseConnectorTest.java @@ -18,57 +18,6 @@ package com.netease.arctic.trino.iceberg; -import com.google.common.base.Stopwatch; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.util.concurrent.UncheckedTimeoutException; -import io.airlift.units.Duration; -import io.trino.Session; -import io.trino.connector.CatalogName; -import io.trino.cost.StatsAndCosts; -import io.trino.dispatcher.DispatchManager; -import io.trino.execution.QueryInfo; -import io.trino.execution.QueryManager; -import io.trino.metadata.FunctionManager; -import io.trino.metadata.Metadata; -import io.trino.metadata.QualifiedObjectName; -import io.trino.server.BasicQueryInfo; -import io.trino.sql.planner.OptimizerConfig.JoinDistributionType; -import io.trino.sql.planner.Plan; -import io.trino.sql.planner.plan.LimitNode; -import io.trino.testing.AbstractTestQueries; -import io.trino.testing.DistributedQueryRunner; -import io.trino.testing.LocalQueryRunner; -import io.trino.testing.MaterializedResult; -import io.trino.testing.MaterializedResultWithQueryId; -import io.trino.testing.MaterializedRow; -import io.trino.testing.TestingConnectorBehavior; -import io.trino.testing.sql.TestTable; -import org.intellij.lang.annotations.Language; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.lang.reflect.Method; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Deque; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionService; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorCompletionService; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Consumer; -import java.util.function.Predicate; -import java.util.function.Supplier; -import java.util.stream.Stream; - import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verifyNotNull; import static com.google.common.collect.Iterables.getOnlyElement; @@ -131,17 +80,66 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -/** - * Generic test for connectors. - */ -public abstract class BaseConnectorTest - extends AbstractTestQueries { +import com.google.common.base.Stopwatch; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.util.concurrent.UncheckedTimeoutException; +import io.airlift.units.Duration; +import io.trino.Session; +import io.trino.connector.CatalogName; +import io.trino.cost.StatsAndCosts; +import io.trino.dispatcher.DispatchManager; +import io.trino.execution.QueryInfo; +import io.trino.execution.QueryManager; +import io.trino.metadata.FunctionManager; +import io.trino.metadata.Metadata; +import io.trino.metadata.QualifiedObjectName; +import io.trino.server.BasicQueryInfo; +import io.trino.sql.planner.OptimizerConfig.JoinDistributionType; +import io.trino.sql.planner.Plan; +import io.trino.sql.planner.plan.LimitNode; +import io.trino.testing.AbstractTestQueries; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.LocalQueryRunner; +import io.trino.testing.MaterializedResult; +import io.trino.testing.MaterializedResultWithQueryId; +import io.trino.testing.MaterializedRow; +import io.trino.testing.TestingConnectorBehavior; +import io.trino.testing.sql.TestTable; +import org.intellij.lang.annotations.Language; +import org.testng.Assert; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.lang.reflect.Method; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.stream.Stream; + +/** Generic test for connectors. */ +public abstract class BaseConnectorTest extends AbstractTestQueries { private Method method; protected BaseConnectorTest() { try { - this.method = TestingConnectorBehavior.class.getDeclaredMethod("hasBehaviorByDefault", Predicate.class); + this.method = + TestingConnectorBehavior.class.getDeclaredMethod("hasBehaviorByDefault", Predicate.class); method.setAccessible(true); } catch (NoSuchMethodException e) { e.printStackTrace(); @@ -150,13 +148,16 @@ protected BaseConnectorTest() { protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior) { try { - return (boolean) method.invoke(connectorBehavior, new Predicate() { - - @Override - public boolean test(TestingConnectorBehavior testingConnectorBehavior) { - return hasBehavior(testingConnectorBehavior); - } - }); + return (boolean) + method.invoke( + connectorBehavior, + new Predicate() { + + @Override + public boolean test(TestingConnectorBehavior testingConnectorBehavior) { + return hasBehavior(testingConnectorBehavior); + } + }); } catch (Exception e) { throw new RuntimeException(e); } @@ -166,17 +167,17 @@ public boolean test(TestingConnectorBehavior testingConnectorBehavior) { @Override public void ensureTestNamingConvention() { // Enforce a naming convention to make code navigation easier. - assertThat(getClass().getName()) - .endsWith("ConnectorTest"); + assertThat(getClass().getName()).endsWith("ConnectorTest"); } /** - * Ensure the tests are run with {@link DistributedQueryRunner}. E.g. {@link LocalQueryRunner} takes some - * shortcuts, not exercising certain aspects. + * Ensure the tests are run with {@link DistributedQueryRunner}. E.g. {@link LocalQueryRunner} + * takes some shortcuts, not exercising certain aspects. */ @Test public void ensureDistributedQueryRunner() { - assertThat(getQueryRunner().getNodeCount()).as("query runner node count") + assertThat(getQueryRunner().getNodeCount()) + .as("query runner node count") .isGreaterThanOrEqualTo(3); } @@ -184,14 +185,16 @@ public void ensureDistributedQueryRunner() { public void testShowCreateSchema() { String schemaName = getSession().getSchema().orElseThrow(); assertThat((String) computeScalar("SHOW CREATE SCHEMA " + schemaName)) - .isEqualTo(format("CREATE SCHEMA %s.%s", getSession().getCatalog().orElseThrow(), schemaName)); + .isEqualTo( + format("CREATE SCHEMA %s.%s", getSession().getCatalog().orElseThrow(), schemaName)); } @Test public void testCreateSchema() { String schemaName = "test_schema_create_" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_CREATE_SCHEMA)) { - assertQueryFails(createSchemaSql(schemaName), "This connector does not support creating schemas"); + assertQueryFails( + createSchemaSql(schemaName), "This connector does not support creating schemas"); return; } assertThat(computeActual("SHOW SCHEMAS").getOnlyColumnAsSet()).doesNotContain(schemaName); @@ -202,16 +205,21 @@ public void testCreateSchema() { // verify SHOW CREATE SCHEMA works assertThat((String) computeScalar("SHOW CREATE SCHEMA " + schemaName)) - .startsWith(format("CREATE SCHEMA %s.%s", getSession().getCatalog().orElseThrow(), schemaName)); + .startsWith( + format("CREATE SCHEMA %s.%s", getSession().getCatalog().orElseThrow(), schemaName)); // try to create duplicate schema - assertQueryFails(createSchemaSql(schemaName), format("line 1:1: Schema '.*\\.%s' already exists", schemaName)); + assertQueryFails( + createSchemaSql(schemaName), + format("line 1:1: Schema '.*\\.%s' already exists", schemaName)); // cleanup assertUpdate("DROP SCHEMA " + schemaName); // verify DROP SCHEMA for non-existing schema - assertQueryFails("DROP SCHEMA " + schemaName, format("line 1:1: Schema '.*\\.%s' does not exist", schemaName)); + assertQueryFails( + "DROP SCHEMA " + schemaName, + format("line 1:1: Schema '.*\\.%s' does not exist", schemaName)); } @Test @@ -225,7 +233,8 @@ public void testDropNonEmptySchemaWithTable() { try { assertUpdate("CREATE SCHEMA " + schemaName); assertUpdate("CREATE TABLE " + schemaName + ".t(x int)"); - assertQueryFails("DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); + assertQueryFails( + "DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); } finally { assertUpdate("DROP TABLE IF EXISTS " + schemaName + ".t"); assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); @@ -247,7 +256,8 @@ public void testDropNonEmptySchemaWithView() { assertUpdate("CREATE SCHEMA " + schemaName); assertUpdate("CREATE VIEW " + schemaName + ".v_t AS SELECT 123 x"); - assertQueryFails("DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); + assertQueryFails( + "DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); } finally { assertUpdate("DROP VIEW IF EXISTS " + schemaName + ".v_t"); assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); @@ -269,7 +279,8 @@ public void testDropNonEmptySchemaWithMaterializedView() { assertUpdate("CREATE SCHEMA " + schemaName); assertUpdate("CREATE MATERIALIZED VIEW " + schemaName + ".mv_t AS SELECT 123 x"); - assertQueryFails("DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); + assertQueryFails( + "DROP SCHEMA " + schemaName, ".*Cannot drop non-empty schema '\\Q" + schemaName + "\\E'"); } finally { assertUpdate("DROP MATERIALIZED VIEW IF EXISTS " + schemaName + ".mv_t"); assertUpdate("DROP SCHEMA IF EXISTS " + schemaName); @@ -281,19 +292,21 @@ public void testColumnsInReverseOrder() { assertQuery("SELECT shippriority, clerk, totalprice FROM orders"); } - // Test char and varchar comparisons. Currently, unless such comparison is unwrapped in the engine, it's not pushed down into the connector, + // Test char and varchar comparisons. Currently, unless such comparison is unwrapped in the + // engine, it's not pushed down into the connector, // but this can change with expression-based predicate pushdown. @Test public void testCharVarcharComparison() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); - try (TestTable table = new TestTable( - getQueryRunner()::execute, - "test_char_varchar", - "(k, v) AS VALUES" + - " (-1, CAST(NULL AS char(3))), " + - " (3, CAST(' ' AS char(3)))," + - " (6, CAST('x ' AS char(3)))")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "test_char_varchar", + "(k, v) AS VALUES" + + " (-1, CAST(NULL AS char(3))), " + + " (3, CAST(' ' AS char(3)))," + + " (6, CAST('x ' AS char(3)))")) { // varchar of length shorter than column's length assertQuery( "SELECT k, v FROM " + table.getName() + " WHERE v = CAST(' ' AS varchar(2))", @@ -314,33 +327,37 @@ public void testCharVarcharComparison() { } } - // Test varchar and char comparisons. Currently, unless such comparison is unwrapped in the engine, it's not pushed down into the connector, + // Test varchar and char comparisons. Currently, unless such comparison is unwrapped in the + // engine, it's not pushed down into the connector, // but this can change with expression-based predicate pushdown. @Test public void testVarcharCharComparison() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); - try (TestTable table = new TestTable( - getQueryRunner()::execute, - "test_varchar_char", - "(k, v) AS VALUES" + - " (-1, CAST(NULL AS varchar(3))), " + - " (0, CAST('' AS varchar(3)))," + - " (1, CAST(' ' AS varchar(3))), " + - " (2, CAST(' ' AS varchar(3))), " + - " (3, CAST(' ' AS varchar(3)))," + - " (4, CAST('x' AS varchar(3)))," + - " (5, CAST('x ' AS varchar(3)))," + - " (6, CAST('x ' AS varchar(3)))")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "test_varchar_char", + "(k, v) AS VALUES" + + " (-1, CAST(NULL AS varchar(3))), " + + " (0, CAST('' AS varchar(3)))," + + " (1, CAST(' ' AS varchar(3))), " + + " (2, CAST(' ' AS varchar(3))), " + + " (3, CAST(' ' AS varchar(3)))," + + " (4, CAST('x' AS varchar(3)))," + + " (5, CAST('x ' AS varchar(3)))," + + " (6, CAST('x ' AS varchar(3)))")) { assertQuery( "SELECT k, v FROM " + table.getName() + " WHERE v = CAST(' ' AS char(2))", - // The 3-spaces value is included because both sides of the comparison are coerced to char(3) + // The 3-spaces value is included because both sides of the comparison are coerced to + // char(3) "VALUES (0, ''), (1, ' '), (2, ' '), (3, ' ')"); // value that's not all-spaces assertQuery( "SELECT k, v FROM " + table.getName() + " WHERE v = CAST('x ' AS char(2))", - // The 3-spaces value is included because both sides of the comparison are coerced to char(3) + // The 3-spaces value is included because both sides of the comparison are coerced to + // char(3) "VALUES (4, 'x'), (5, 'x '), (6, 'x ')"); } } @@ -362,14 +379,14 @@ public void testAggregation() { // TODO support aggregation pushdown with GROUPING SETS assertQuery( "SELECT regionkey, nationkey FROM nation GROUP BY GROUPING SETS ((regionkey), (nationkey))", - "SELECT NULL, nationkey FROM nation " + - "UNION ALL SELECT DISTINCT regionkey, NULL FROM nation"); + "SELECT NULL, nationkey FROM nation " + + "UNION ALL SELECT DISTINCT regionkey, NULL FROM nation"); assertQuery( "SELECT regionkey, nationkey, count(*) FROM nation GROUP BY GROUPING SETS ((), (regionkey), (nationkey), (regionkey, nationkey))", - "SELECT NULL, NULL, count(*) FROM nation " + - "UNION ALL SELECT NULL, nationkey, 1 FROM nation " + - "UNION ALL SELECT regionkey, NULL, count(*) FROM nation GROUP BY regionkey " + - "UNION ALL SELECT regionkey, nationkey, 1 FROM nation"); + "SELECT NULL, NULL, count(*) FROM nation " + + "UNION ALL SELECT NULL, nationkey, 1 FROM nation " + + "UNION ALL SELECT regionkey, NULL, count(*) FROM nation GROUP BY regionkey " + + "UNION ALL SELECT regionkey, nationkey, 1 FROM nation"); assertQuery("SELECT count(regionkey) FROM nation"); assertQuery("SELECT count(DISTINCT regionkey) FROM nation"); @@ -377,15 +394,15 @@ public void testAggregation() { assertQuery("SELECT min(regionkey), max(regionkey) FROM nation"); assertQuery("SELECT min(DISTINCT regionkey), max(DISTINCT regionkey) FROM nation"); - assertQuery("SELECT regionkey, min(regionkey), min(name), max(regionkey), max(name) FROM nation GROUP BY regionkey"); + assertQuery( + "SELECT regionkey, min(regionkey), min(name), max(regionkey), max(name) FROM nation GROUP BY regionkey"); assertQuery("SELECT sum(regionkey) FROM nation"); assertQuery("SELECT sum(DISTINCT regionkey) FROM nation"); assertQuery("SELECT regionkey, sum(regionkey) FROM nation GROUP BY regionkey"); assertQuery( - "SELECT avg(nationkey) FROM nation", - "SELECT avg(CAST(nationkey AS double)) FROM nation"); + "SELECT avg(nationkey) FROM nation", "SELECT avg(CAST(nationkey AS double)) FROM nation"); assertQuery( "SELECT avg(DISTINCT nationkey) FROM nation", "SELECT avg(DISTINCT CAST(nationkey AS double)) FROM nation"); @@ -410,10 +427,14 @@ public void testInListPredicate() { assertQueryReturnsEmptyResult("SELECT * FROM orders WHERE orderkey IN (10, 11, 20, 21)"); // filtered column is selected - assertQuery("SELECT custkey, orderkey FROM orders WHERE orderkey IN (7, 10, 32, 33)", "VALUES (392, 7), (1301, 32), (670, 33)"); + assertQuery( + "SELECT custkey, orderkey FROM orders WHERE orderkey IN (7, 10, 32, 33)", + "VALUES (392, 7), (1301, 32), (670, 33)"); // filtered column is not selected - assertQuery("SELECT custkey FROM orders WHERE orderkey IN (7, 10, 32, 33)", "VALUES (392), (1301), (670)"); + assertQuery( + "SELECT custkey FROM orders WHERE orderkey IN (7, 10, 32, 33)", + "VALUES (392), (1301), (670)"); } @Test @@ -422,10 +443,13 @@ public void testIsNullPredicate() { assertQueryReturnsEmptyResult("SELECT * FROM orders WHERE orderkey = 10 OR orderkey IS NULL"); // filtered column is selected - assertQuery("SELECT custkey, orderkey FROM orders WHERE orderkey = 32 OR orderkey IS NULL", "VALUES (1301, 32)"); + assertQuery( + "SELECT custkey, orderkey FROM orders WHERE orderkey = 32 OR orderkey IS NULL", + "VALUES (1301, 32)"); // filtered column is not selected - assertQuery("SELECT custkey FROM orders WHERE orderkey = 32 OR orderkey IS NULL", "VALUES (1301)"); + assertQuery( + "SELECT custkey FROM orders WHERE orderkey = 32 OR orderkey IS NULL", "VALUES (1301)"); } @Test @@ -446,31 +470,37 @@ public void testLikePredicate() { @Test public void testMultipleRangesPredicate() { // List columns explicitly. Some connectors do not maintain column ordering. - assertQuery("" + - "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment " + - "FROM orders " + - "WHERE orderkey BETWEEN 10 AND 50"); + assertQuery( + "" + + "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment " + + "FROM orders " + + "WHERE orderkey BETWEEN 10 AND 50"); } @Test public void testRangePredicate() { // List columns explicitly. Some connectors do not maintain column ordering. - assertQuery("" + - "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment " + - "FROM orders " + - "WHERE orderkey BETWEEN 10 AND 50"); + assertQuery( + "" + + "SELECT orderkey, custkey, orderstatus, totalprice, orderdate, orderpriority, clerk, shippriority, comment " + + "FROM orders " + + "WHERE orderkey BETWEEN 10 AND 50"); } @Test public void testDateYearOfEraPredicate() { - // Verify the predicate of '-1996-09-14' doesn't match '1997-09-14'. Both values return same formatted string when we use 'yyyy-MM-dd' in DateTimeFormatter - assertQuery("SELECT orderdate FROM orders WHERE orderdate = DATE '1997-09-14'", "VALUES DATE '1997-09-14'"); + // Verify the predicate of '-1996-09-14' doesn't match '1997-09-14'. Both values return same + // formatted string when we use 'yyyy-MM-dd' in DateTimeFormatter + assertQuery( + "SELECT orderdate FROM orders WHERE orderdate = DATE '1997-09-14'", + "VALUES DATE '1997-09-14'"); assertQueryReturnsEmptyResult("SELECT * FROM orders WHERE orderdate = DATE '-1996-09-14'"); } @Test public void testPredicateReflectedInExplain() { - // Even if the predicate is pushed down into the table scan, it should still be reflected in EXPLAIN (via ConnectorTableHandle.toString) + // Even if the predicate is pushed down into the table scan, it should still be reflected in + // EXPLAIN (via ConnectorTableHandle.toString) assertExplain( "EXPLAIN SELECT name FROM nation WHERE nationkey = 42", "(predicate|filterPredicate|constraint).{0,10}(nationkey|NATIONKEY)"); @@ -478,10 +508,13 @@ public void testPredicateReflectedInExplain() { @Test public void testSortItemsReflectedInExplain() { - // Even if the sort items are pushed down into the table scan, it should still be reflected in EXPLAIN (via ConnectorTableHandle.toString) - @Language("RegExp") String expectedPattern = hasBehavior(SUPPORTS_TOPN_PUSHDOWN) - ? "sortOrder=\\[(?i:nationkey):.* DESC NULLS LAST] limit=5" - : "\\[count = 5, orderBy = \\[(?i:nationkey) DESC NULLS LAST]]"; + // Even if the sort items are pushed down into the table scan, it should still be reflected in + // EXPLAIN (via ConnectorTableHandle.toString) + @Language("RegExp") + String expectedPattern = + hasBehavior(SUPPORTS_TOPN_PUSHDOWN) + ? "sortOrder=\\[(?i:nationkey):.* DESC NULLS LAST] limit=5" + : "\\[count = 5, orderBy = \\[(?i:nationkey) DESC NULLS LAST]]"; assertExplain( "EXPLAIN SELECT name FROM nation ORDER BY nationkey DESC NULLS LAST LIMIT 5", @@ -491,7 +524,9 @@ public void testSortItemsReflectedInExplain() { @Test public void testConcurrentScans() { String unionMultipleTimes = join(" UNION ALL ", nCopies(25, "SELECT * FROM orders")); - assertQuery("SELECT sum(if(rand() >= 0, orderkey)) FROM (" + unionMultipleTimes + ")", "VALUES 11246812500"); + assertQuery( + "SELECT sum(if(rand() >= 0, orderkey)) FROM (" + unionMultipleTimes + ")", + "VALUES 11246812500"); } @Test @@ -501,11 +536,12 @@ public void testSelectAll() { @Test public void testSelectInTransaction() { - inTransaction(session -> { - assertQuery(session, "SELECT nationkey, name, regionkey FROM nation"); - assertQuery(session, "SELECT regionkey, name FROM region"); - assertQuery(session, "SELECT nationkey, name, regionkey FROM nation"); - }); + inTransaction( + session -> { + assertQuery(session, "SELECT nationkey, name, regionkey FROM nation"); + assertQuery(session, "SELECT regionkey, name FROM region"); + assertQuery(session, "SELECT nationkey, name, regionkey FROM nation"); + }); } /** @@ -515,68 +551,78 @@ public void testSelectInTransaction() { public void testJoinWithEmptySides(JoinDistributionType joinDistributionType) { Session session = noJoinReordering(joinDistributionType); // empty build side - assertQuery(session, "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.name = ''", "VALUES 0"); - assertQuery(session, "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.regionkey < 0", "VALUES 0"); + assertQuery( + session, + "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.name = ''", + "VALUES 0"); + assertQuery( + session, + "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.regionkey < 0", + "VALUES 0"); // empty probe side - assertQuery(session, "SELECT count(*) FROM region JOIN nation ON nation.regionkey = region.regionkey AND region.name = ''", "VALUES 0"); - assertQuery(session, "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.regionkey < 0", "VALUES 0"); + assertQuery( + session, + "SELECT count(*) FROM region JOIN nation ON nation.regionkey = region.regionkey AND region.name = ''", + "VALUES 0"); + assertQuery( + session, + "SELECT count(*) FROM nation JOIN region ON nation.regionkey = region.regionkey AND region.regionkey < 0", + "VALUES 0"); } @DataProvider public Object[][] joinDistributionTypes() { - return Stream.of(JoinDistributionType.values()) - .collect(toDataProvider()); + return Stream.of(JoinDistributionType.values()).collect(toDataProvider()); } - /** - * Test interactions between optimizer (including CBO) and connector metadata APIs. - */ + /** Test interactions between optimizer (including CBO) and connector metadata APIs. */ @Test public void testJoin() { - Session session = Session.builder(getSession()) - .setSystemProperty(IGNORE_STATS_CALCULATOR_FAILURES, "false") - .build(); + Session session = + Session.builder(getSession()) + .setSystemProperty(IGNORE_STATS_CALCULATOR_FAILURES, "false") + .build(); // 2 inner joins, eligible for join reodering assertQuery( session, - "SELECT c.name, n.name, r.name " + - "FROM nation n " + - "JOIN customer c ON c.nationkey = n.nationkey " + - "JOIN region r ON n.regionkey = r.regionkey"); + "SELECT c.name, n.name, r.name " + + "FROM nation n " + + "JOIN customer c ON c.nationkey = n.nationkey " + + "JOIN region r ON n.regionkey = r.regionkey"); // 2 inner joins, eligible for join reodering, where one table has a filter assertQuery( session, - "SELECT c.name, n.name, r.name " + - "FROM nation n " + - "JOIN customer c ON c.nationkey = n.nationkey " + - "JOIN region r ON n.regionkey = r.regionkey " + - "WHERE n.name = 'ARGENTINA'"); + "SELECT c.name, n.name, r.name " + + "FROM nation n " + + "JOIN customer c ON c.nationkey = n.nationkey " + + "JOIN region r ON n.regionkey = r.regionkey " + + "WHERE n.name = 'ARGENTINA'"); // 2 inner joins, eligible for join reodering, on top of aggregation assertQuery( session, - "SELECT c.name, n.name, n.count, r.name " + - "FROM (SELECT name, regionkey, nationkey, count(*) count FROM nation GROUP BY name, regionkey, nationkey) n " + - "JOIN customer c ON c.nationkey = n.nationkey " + - "JOIN region r ON n.regionkey = r.regionkey"); + "SELECT c.name, n.name, n.count, r.name " + + "FROM (SELECT name, regionkey, nationkey, count(*) count FROM nation GROUP BY name, regionkey, nationkey) n " + + "JOIN customer c ON c.nationkey = n.nationkey " + + "JOIN region r ON n.regionkey = r.regionkey"); } @Test public void testDescribeTable() { - MaterializedResult - expectedColumns = MaterializedResult.resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) - .row("orderkey", "bigint", "", "") - .row("custkey", "bigint", "", "") - .row("orderstatus", "varchar(1)", "", "") - .row("totalprice", "double", "", "") - .row("orderdate", "date", "", "") - .row("orderpriority", "varchar(15)", "", "") - .row("clerk", "varchar(15)", "", "") - .row("shippriority", "integer", "", "") - .row("comment", "varchar(79)", "", "") - .build(); + MaterializedResult expectedColumns = + MaterializedResult.resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("orderkey", "bigint", "", "") + .row("custkey", "bigint", "", "") + .row("orderstatus", "varchar(1)", "", "") + .row("totalprice", "double", "", "") + .row("orderdate", "date", "", "") + .row("orderpriority", "varchar(15)", "", "") + .row("clerk", "varchar(15)", "", "") + .row("shippriority", "integer", "", "") + .row("comment", "varchar(79)", "", "") + .build(); MaterializedResult actualColumns = computeActual("DESCRIBE orders"); assertEquals(actualColumns, expectedColumns); } @@ -584,11 +630,14 @@ public void testDescribeTable() { @Test public void testView() { if (!hasBehavior(SUPPORTS_CREATE_VIEW)) { - assertQueryFails("CREATE VIEW nation_v AS SELECT * FROM nation", "This connector does not support creating views"); + assertQueryFails( + "CREATE VIEW nation_v AS SELECT * FROM nation", + "This connector does not support creating views"); return; } - @Language("SQL") String query = "SELECT orderkey, orderstatus, (totalprice / 2) half FROM orders"; + @Language("SQL") + String query = "SELECT orderkey, orderstatus, (totalprice / 2) half FROM orders"; String catalogName = getSession().getCatalog().orElseThrow(); String schemaName = getSession().getSchema().orElseThrow(); @@ -603,12 +652,18 @@ public void testView() { // verify comment MaterializedResult materializedRows = computeActual("SHOW CREATE VIEW " + testViewWithComment); assertThat((String) materializedRows.getOnlyValue()).contains("COMMENT 'orders'"); - assertThat(query( - "SELECT table_name, comment FROM system.metadata.table_comments " + - "WHERE catalog_name = '" + catalogName + "' AND " + - "schema_name = '" + schemaName + "'")) + assertThat( + query( + "SELECT table_name, comment FROM system.metadata.table_comments " + + "WHERE catalog_name = '" + + catalogName + + "' AND " + + "schema_name = '" + + schemaName + + "'")) .skippingTypesCheck() - .containsAll("VALUES ('" + testView + "', null), ('" + testViewWithComment + "', 'orders')"); + .containsAll( + "VALUES ('" + testView + "', null), ('" + testViewWithComment + "', 'orders')"); // reading assertQuery("SELECT * FROM " + testView, query); @@ -626,31 +681,43 @@ public void testView() { assertUpdate("DROP VIEW " + testViewWithComment); // information_schema.views without table_name filter - assertThat(query( - "SELECT table_name, regexp_replace(view_definition, '\\s', '') FROM information_schema.views " + - "WHERE table_schema = '" + schemaName + "'")) + assertThat( + query( + "SELECT table_name, regexp_replace(view_definition, '\\s', '') FROM information_schema.views " + + "WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES ('" + testView + "', '" + query.replaceAll("\\s", "") + "')"); // information_schema.views with table_name filter assertQuery( - "SELECT table_name, regexp_replace(view_definition, '\\s', '') FROM information_schema.views " + - "WHERE table_schema = '" + schemaName + "' and table_name = '" + testView + "'", + "SELECT table_name, regexp_replace(view_definition, '\\s', '') FROM information_schema.views " + + "WHERE table_schema = '" + + schemaName + + "' and table_name = '" + + testView + + "'", "VALUES ('" + testView + "', '" + query.replaceAll("\\s", "") + "')"); // table listing - assertThat(query("SHOW TABLES")) - .skippingTypesCheck() - .containsAll("VALUES '" + testView + "'"); + assertThat(query("SHOW TABLES")).skippingTypesCheck().containsAll("VALUES '" + testView + "'"); // information_schema.tables without table_name filter - assertThat(query( - "SELECT table_name, table_type FROM information_schema.tables " + - "WHERE table_schema = '" + schemaName + "'")) + assertThat( + query( + "SELECT table_name, table_type FROM information_schema.tables " + + "WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES ('" + testView + "', 'VIEW')"); // information_schema.tables with table_name filter assertQuery( - "SELECT table_name, table_type FROM information_schema.tables " + - "WHERE table_schema = '" + schemaName + "' and table_name = '" + testView + "'", + "SELECT table_name, table_type FROM information_schema.tables " + + "WHERE table_schema = '" + + schemaName + + "' and table_name = '" + + testView + + "'", "VALUES ('" + testView + "', 'VIEW')"); // system.jdbc.tables without filter @@ -660,11 +727,17 @@ public void testView() { // system.jdbc.tables with table prefix filter assertQuery( - "SELECT table_schem, table_name, table_type " + - "FROM system.jdbc.tables " + - "WHERE table_cat = '" + catalogName + "' AND " + - "table_schem = '" + schemaName + "' AND " + - "table_name = '" + testView + "'", + "SELECT table_schem, table_name, table_type " + + "FROM system.jdbc.tables " + + "WHERE table_cat = '" + + catalogName + + "' AND " + + "table_schem = '" + + schemaName + + "' AND " + + "table_name = '" + + testView + + "'", "VALUES ('" + schemaName + "', '" + testView + "', 'VIEW')"); // column listing @@ -679,27 +752,43 @@ public void testView() { .matches("VALUES 'orderkey', 'orderstatus', 'half'"); // information_schema.columns without table_name filter - assertThat(query( - "SELECT table_name, column_name " + - "FROM information_schema.columns " + - "WHERE table_schema = '" + schemaName + "'")) + assertThat( + query( + "SELECT table_name, column_name " + + "FROM information_schema.columns " + + "WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES '" + testView + "') " + - "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); + "SELECT * FROM (VALUES '" + + testView + + "') " + + "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); // information_schema.columns with table_name filter - assertThat(query( - "SELECT table_name, column_name " + - "FROM information_schema.columns " + - "WHERE table_schema = '" + schemaName + "' and table_name = '" + testView + "'")) + assertThat( + query( + "SELECT table_name, column_name " + + "FROM information_schema.columns " + + "WHERE table_schema = '" + + schemaName + + "' and table_name = '" + + testView + + "'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES '" + testView + "') " + - "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); + "SELECT * FROM (VALUES '" + + testView + + "') " + + "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); // view-specific listings - assertThat(query("SELECT table_name FROM information_schema.views WHERE table_schema = '" + schemaName + "'")) + assertThat( + query( + "SELECT table_name FROM information_schema.views WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES '" + testView + "'"); @@ -707,28 +796,46 @@ public void testView() { assertThat(query("SELECT table_schem, table_name, column_name FROM system.jdbc.columns")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + schemaName + "', '" + testView + "')) " + - "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); + "SELECT * FROM (VALUES ('" + + schemaName + + "', '" + + testView + + "')) " + + "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); // system.jdbc.columns with schema filter - assertThat(query( - "SELECT table_schem, table_name, column_name " + - "FROM system.jdbc.columns " + - "WHERE table_schem LIKE '%" + schemaName + "%'")) + assertThat( + query( + "SELECT table_schem, table_name, column_name " + + "FROM system.jdbc.columns " + + "WHERE table_schem LIKE '%" + + schemaName + + "%'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + schemaName + "', '" + testView + "')) " + - "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); + "SELECT * FROM (VALUES ('" + + schemaName + + "', '" + + testView + + "')) " + + "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); // system.jdbc.columns with table filter - assertThat(query( - "SELECT table_schem, table_name, column_name " + - "FROM system.jdbc.columns " + - "WHERE table_name LIKE '%" + testView + "%'")) + assertThat( + query( + "SELECT table_schem, table_name, column_name " + + "FROM system.jdbc.columns " + + "WHERE table_name LIKE '%" + + testView + + "%'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + schemaName + "', '" + testView + "')) " + - "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); + "SELECT * FROM (VALUES ('" + + schemaName + + "', '" + + testView + + "')) " + + "CROSS JOIN UNNEST(ARRAY['orderkey', 'orderstatus', 'half'])"); assertUpdate("DROP VIEW " + testView); } @@ -752,41 +859,55 @@ public void testViewCaseSensitivity() { @Test public void testMaterializedView() { if (!hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)) { - assertQueryFails("CREATE MATERIALIZED VIEW nation_mv AS SELECT * FROM nation", "This connector does not support creating materialized views"); + assertQueryFails( + "CREATE MATERIALIZED VIEW nation_mv AS SELECT * FROM nation", + "This connector does not support creating materialized views"); return; } - QualifiedObjectName view = new QualifiedObjectName( - getSession().getCatalog().orElseThrow(), - getSession().getSchema().orElseThrow(), - "test_materialized_view_" + randomNameSuffix()); - QualifiedObjectName otherView = new QualifiedObjectName( - getSession().getCatalog().orElseThrow(), - "other_schema", - "test_materialized_view_" + randomNameSuffix()); - QualifiedObjectName viewWithComment = new QualifiedObjectName( - getSession().getCatalog().orElseThrow(), - getSession().getSchema().orElseThrow(), - "test_materialized_view_with_comment_" + randomNameSuffix()); + QualifiedObjectName view = + new QualifiedObjectName( + getSession().getCatalog().orElseThrow(), + getSession().getSchema().orElseThrow(), + "test_materialized_view_" + randomNameSuffix()); + QualifiedObjectName otherView = + new QualifiedObjectName( + getSession().getCatalog().orElseThrow(), + "other_schema", + "test_materialized_view_" + randomNameSuffix()); + QualifiedObjectName viewWithComment = + new QualifiedObjectName( + getSession().getCatalog().orElseThrow(), + getSession().getSchema().orElseThrow(), + "test_materialized_view_with_comment_" + randomNameSuffix()); createTestingMaterializedView(view, Optional.empty()); createTestingMaterializedView(otherView, Optional.of("sarcastic comment")); createTestingMaterializedView(viewWithComment, Optional.of("mv_comment")); // verify comment - MaterializedResult materializedRows = computeActual("SHOW CREATE MATERIALIZED VIEW " + viewWithComment); + MaterializedResult materializedRows = + computeActual("SHOW CREATE MATERIALIZED VIEW " + viewWithComment); assertThat((String) materializedRows.getOnlyValue()).contains("COMMENT 'mv_comment'"); - assertThat(query( - "SELECT table_name, comment FROM system.metadata.table_comments " + - "WHERE catalog_name = '" + view.getCatalogName() + "' AND " + - "schema_name = '" + view.getSchemaName() + "'")) + assertThat( + query( + "SELECT table_name, comment FROM system.metadata.table_comments " + + "WHERE catalog_name = '" + + view.getCatalogName() + + "' AND " + + "schema_name = '" + + view.getSchemaName() + + "'")) .skippingTypesCheck() - .containsAll("VALUES ('" + view.getObjectName() + "', null), ('" + viewWithComment.getObjectName() + "', 'mv_comment')"); + .containsAll( + "VALUES ('" + + view.getObjectName() + + "', null), ('" + + viewWithComment.getObjectName() + + "', 'mv_comment')"); // reading - assertThat(query("SELECT * FROM " + view)) - .skippingTypesCheck() - .matches("SELECT * FROM nation"); + assertThat(query("SELECT * FROM " + view)).skippingTypesCheck().matches("SELECT * FROM nation"); assertThat(query("SELECT * FROM " + viewWithComment)) .skippingTypesCheck() .matches("SELECT * FROM nation"); @@ -796,29 +917,46 @@ public void testMaterializedView() { .skippingTypesCheck() .containsAll("VALUES '" + view.getObjectName() + "'"); // information_schema.tables without table_name filter - assertThat(query( - "SELECT table_name, table_type FROM information_schema.tables " + - "WHERE table_schema = '" + view.getSchemaName() + "'")) + assertThat( + query( + "SELECT table_name, table_type FROM information_schema.tables " + + "WHERE table_schema = '" + + view.getSchemaName() + + "'")) .skippingTypesCheck() - .containsAll("VALUES ('" + view.getObjectName() + "', 'BASE TABLE')"); // TODO table_type should probably be "* VIEW" + .containsAll( + "VALUES ('" + + view.getObjectName() + + "', 'BASE TABLE')"); // TODO table_type should probably be "* VIEW" // information_schema.tables with table_name filter assertQuery( - "SELECT table_name, table_type FROM information_schema.tables " + - "WHERE table_schema = '" + view.getSchemaName() + "' and table_name = '" + view.getObjectName() + "'", + "SELECT table_name, table_type FROM information_schema.tables " + + "WHERE table_schema = '" + + view.getSchemaName() + + "' and table_name = '" + + view.getObjectName() + + "'", "VALUES ('" + view.getObjectName() + "', 'BASE TABLE')"); // system.jdbc.tables without filter assertThat(query("SELECT table_schem, table_name, table_type FROM system.jdbc.tables")) .skippingTypesCheck() - .containsAll("VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "', 'TABLE')"); + .containsAll( + "VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "', 'TABLE')"); // system.jdbc.tables with table prefix filter assertQuery( - "SELECT table_schem, table_name, table_type " + - "FROM system.jdbc.tables " + - "WHERE table_cat = '" + view.getCatalogName() + "' AND " + - "table_schem = '" + view.getSchemaName() + "' AND " + - "table_name = '" + view.getObjectName() + "'", + "SELECT table_schem, table_name, table_type " + + "FROM system.jdbc.tables " + + "WHERE table_cat = '" + + view.getCatalogName() + + "' AND " + + "table_schem = '" + + view.getSchemaName() + + "' AND " + + "table_name = '" + + view.getObjectName() + + "'", "VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "', 'TABLE')"); // column listing @@ -833,24 +971,36 @@ public void testMaterializedView() { .matches("VALUES 'nationkey', 'name', 'regionkey', 'comment'"); // information_schema.columns without table_name filter - assertThat(query( - "SELECT table_name, column_name " + - "FROM information_schema.columns " + - "WHERE table_schema = '" + view.getSchemaName() + "'")) + assertThat( + query( + "SELECT table_name, column_name " + + "FROM information_schema.columns " + + "WHERE table_schema = '" + + view.getSchemaName() + + "'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES '" + view.getObjectName() + "') " + - "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); + "SELECT * FROM (VALUES '" + + view.getObjectName() + + "') " + + "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); // information_schema.columns with table_name filter - assertThat(query( - "SELECT table_name, column_name " + - "FROM information_schema.columns " + - "WHERE table_schema = '" + view.getSchemaName() + "' and table_name = '" + view.getObjectName() + "'")) + assertThat( + query( + "SELECT table_name, column_name " + + "FROM information_schema.columns " + + "WHERE table_schema = '" + + view.getSchemaName() + + "' and table_name = '" + + view.getObjectName() + + "'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES '" + view.getObjectName() + "') " + - "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); + "SELECT * FROM (VALUES '" + + view.getObjectName() + + "') " + + "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); // view-specific listings checkInformationSchemaViewsForMaterializedView(view.getSchemaName(), view.getObjectName()); @@ -859,37 +1009,58 @@ public void testMaterializedView() { assertThat(query("SELECT table_schem, table_name, column_name FROM system.jdbc.columns")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "')) " + - "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); + "SELECT * FROM (VALUES ('" + + view.getSchemaName() + + "', '" + + view.getObjectName() + + "')) " + + "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); // system.jdbc.columns with schema filter - assertThat(query( - "SELECT table_schem, table_name, column_name " + - "FROM system.jdbc.columns " + - "WHERE table_schem LIKE '%" + view.getSchemaName() + "%'")) + assertThat( + query( + "SELECT table_schem, table_name, column_name " + + "FROM system.jdbc.columns " + + "WHERE table_schem LIKE '%" + + view.getSchemaName() + + "%'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "')) " + - "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); + "SELECT * FROM (VALUES ('" + + view.getSchemaName() + + "', '" + + view.getObjectName() + + "')) " + + "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); // system.jdbc.columns with table filter - assertThat(query( - "SELECT table_schem, table_name, column_name " + - "FROM system.jdbc.columns " + - "WHERE table_name LIKE '%" + view.getObjectName() + "%'")) + assertThat( + query( + "SELECT table_schem, table_name, column_name " + + "FROM system.jdbc.columns " + + "WHERE table_name LIKE '%" + + view.getObjectName() + + "%'")) .skippingTypesCheck() .containsAll( - "SELECT * FROM (VALUES ('" + view.getSchemaName() + "', '" + view.getObjectName() + "')) " + - "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); + "SELECT * FROM (VALUES ('" + + view.getSchemaName() + + "', '" + + view.getObjectName() + + "')) " + + "CROSS JOIN UNNEST(ARRAY['nationkey', 'name', 'regionkey', 'comment'])"); // details assertThat(((String) computeScalar("SHOW CREATE MATERIALIZED VIEW " + view.getObjectName()))) - .matches("(?s)" + - "CREATE MATERIALIZED VIEW \\Q" + view + "\\E" + - ".* AS\n" + - "SELECT \\*\n" + - "FROM\n" + - " nation"); + .matches( + "(?s)" + + "CREATE MATERIALIZED VIEW \\Q" + + view + + "\\E" + + ".* AS\n" + + "SELECT \\*\n" + + "FROM\n" + + " nation"); // we only want to test filtering materialized views in different schemas, // `viewWithComment` is in the same schema as `view` so it is not needed @@ -900,43 +1071,49 @@ public void testMaterializedView() { .skippingTypesCheck() .containsAll(getTestingMaterializedViewsResultRows(view, otherView)); - assertThat(query( - listMaterializedViewsSql( - "catalog_name = '" + otherView.getCatalogName() + "'", - "schema_name = '" + otherView.getSchemaName() + "'"))) + assertThat( + query( + listMaterializedViewsSql( + "catalog_name = '" + otherView.getCatalogName() + "'", + "schema_name = '" + otherView.getSchemaName() + "'"))) .skippingTypesCheck() .containsAll(getTestingMaterializedViewsResultRow(otherView, "sarcastic comment")); - assertThat(query( - listMaterializedViewsSql( - "catalog_name = '" + view.getCatalogName() + "'", - "schema_name = '" + view.getSchemaName() + "'", - "name = '" + view.getObjectName() + "'"))) + assertThat( + query( + listMaterializedViewsSql( + "catalog_name = '" + view.getCatalogName() + "'", + "schema_name = '" + view.getSchemaName() + "'", + "name = '" + view.getObjectName() + "'"))) .skippingTypesCheck() .containsAll(getTestingMaterializedViewsResultRow(view, "")); - assertThat(query( - listMaterializedViewsSql("schema_name LIKE '%" + view.getSchemaName() + "%'"))) + assertThat(query(listMaterializedViewsSql("schema_name LIKE '%" + view.getSchemaName() + "%'"))) .skippingTypesCheck() .containsAll(getTestingMaterializedViewsResultRow(view, "")); - assertThat(query( - listMaterializedViewsSql("name LIKE '%" + view.getObjectName() + "%'"))) + assertThat(query(listMaterializedViewsSql("name LIKE '%" + view.getObjectName() + "%'"))) .skippingTypesCheck() .containsAll(getTestingMaterializedViewsResultRow(view, "")); // verify write in transaction if (!hasBehavior(SUPPORTS_MULTI_STATEMENT_WRITES)) { - assertThatThrownBy(() -> inTransaction(session -> computeActual(session, "REFRESH MATERIALIZED VIEW " + view))) + assertThatThrownBy( + () -> + inTransaction( + session -> computeActual(session, "REFRESH MATERIALIZED VIEW " + view))) .hasMessageMatching("Catalog only supports writes using autocommit: \\w+"); } assertUpdate("DROP MATERIALIZED VIEW " + view); assertUpdate("DROP MATERIALIZED VIEW " + otherView); - assertQueryReturnsEmptyResult(listMaterializedViewsSql("name = '" + view.getObjectName() + "'")); - assertQueryReturnsEmptyResult(listMaterializedViewsSql("name = '" + otherView.getObjectName() + "'")); - assertQueryReturnsEmptyResult(listMaterializedViewsSql("name = '" + viewWithComment.getObjectName() + "'")); + assertQueryReturnsEmptyResult( + listMaterializedViewsSql("name = '" + view.getObjectName() + "'")); + assertQueryReturnsEmptyResult( + listMaterializedViewsSql("name = '" + otherView.getObjectName() + "'")); + assertQueryReturnsEmptyResult( + listMaterializedViewsSql("name = '" + viewWithComment.getObjectName() + "'")); } @Test @@ -989,21 +1166,25 @@ public void testViewMetadata(String securityClauseInCreate, String securityClaus String viewName = "meta_test_view_" + randomNameSuffix(); - @Language("SQL") String query = "SELECT BIGINT '123' x, 'foo' y"; + @Language("SQL") + String query = "SELECT BIGINT '123' x, 'foo' y"; assertUpdate("CREATE VIEW " + viewName + securityClauseInCreate + " AS " + query); // test INFORMATION_SCHEMA.TABLES - MaterializedResult actual = computeActual(format( - "SELECT table_name, table_type FROM information_schema.tables WHERE table_schema = '%s'", - getSession().getSchema().get())); - - MaterializedResult expected = resultBuilder(getSession(), actual.getTypes()) - .row("customer", "BASE TABLE") - .row(viewName, "VIEW") - .row("nation", "BASE TABLE") - .row("orders", "BASE TABLE") - .row("region", "BASE TABLE") - .build(); + MaterializedResult actual = + computeActual( + format( + "SELECT table_name, table_type FROM information_schema.tables WHERE table_schema = '%s'", + getSession().getSchema().get())); + + MaterializedResult expected = + resultBuilder(getSession(), actual.getTypes()) + .row("customer", "BASE TABLE") + .row(viewName, "VIEW") + .row("nation", "BASE TABLE") + .row("orders", "BASE TABLE") + .row("region", "BASE TABLE") + .build(); assertContains(actual, expected); @@ -1019,40 +1200,49 @@ public void testViewMetadata(String securityClauseInCreate, String securityClaus assertContains(actual, expected); // test INFORMATION_SCHEMA.VIEWS - actual = computeActual(format( - "SELECT table_name, view_definition FROM information_schema.views WHERE table_schema = '%s'", - getSession().getSchema().get())); + actual = + computeActual( + format( + "SELECT table_name, view_definition FROM information_schema.views WHERE table_schema = '%s'", + getSession().getSchema().get())); - expected = resultBuilder(getSession(), actual.getTypes()) - .row(viewName, formatSqlText(query)) - .build(); + expected = + resultBuilder(getSession(), actual.getTypes()).row(viewName, formatSqlText(query)).build(); assertContains(actual, expected); // test SHOW COLUMNS actual = computeActual("SHOW COLUMNS FROM " + viewName); - expected = resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) - .row("x", "bigint", "", "") - .row("y", "varchar(3)", "", "") - .build(); + expected = + resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("x", "bigint", "", "") + .row("y", "varchar(3)", "", "") + .build(); assertEquals(actual, expected); // test SHOW CREATE VIEW - String expectedSql = formatSqlText(format( - "CREATE VIEW %s.%s.%s SECURITY %s AS %s", - getSession().getCatalog().get(), - getSession().getSchema().get(), - viewName, - securityClauseInShowCreate, - query)).trim(); + String expectedSql = + formatSqlText( + format( + "CREATE VIEW %s.%s.%s SECURITY %s AS %s", + getSession().getCatalog().get(), + getSession().getSchema().get(), + viewName, + securityClauseInShowCreate, + query)) + .trim(); actual = computeActual("SHOW CREATE VIEW " + viewName); assertEquals(getOnlyElement(actual.getOnlyColumnAsSet()), expectedSql); - actual = computeActual(format("SHOW CREATE VIEW %s.%s.%s", getSession().getCatalog().get(), getSession().getSchema().get(), viewName)); + actual = + computeActual( + format( + "SHOW CREATE VIEW %s.%s.%s", + getSession().getCatalog().get(), getSession().getSchema().get(), viewName)); assertEquals(getOnlyElement(actual.getOnlyColumnAsSet()), expectedSql); @@ -1061,10 +1251,10 @@ public void testViewMetadata(String securityClauseInCreate, String securityClaus @DataProvider public static Object[][] testViewMetadataDataProvider() { - return new Object[][]{ - {"", "DEFINER"}, - {" SECURITY DEFINER", "DEFINER"}, - {" SECURITY INVOKER", "INVOKER"}, + return new Object[][] { + {"", "DEFINER"}, + {" SECURITY DEFINER", "DEFINER"}, + {" SECURITY INVOKER", "INVOKER"}, }; } @@ -1076,18 +1266,17 @@ public void testShowCreateView() { String viewName = "test_show_create_view" + randomNameSuffix(); assertUpdate("DROP VIEW IF EXISTS " + viewName); - String ddl = format( - "CREATE VIEW %s.%s.%s SECURITY DEFINER AS\n" + - "SELECT *\n" + - "FROM\n" + - " (\n" + - " VALUES \n" + - " ROW (1, 'one')\n" + - " , ROW (2, 't')\n" + - ") t (col1, col2)", - getSession().getCatalog().get(), - getSession().getSchema().get(), - viewName); + String ddl = + format( + "CREATE VIEW %s.%s.%s SECURITY DEFINER AS\n" + + "SELECT *\n" + + "FROM\n" + + " (\n" + + " VALUES \n" + + " ROW (1, 'one')\n" + + " , ROW (2, 't')\n" + + ") t (col1, col2)", + getSession().getCatalog().get(), getSession().getSchema().get(), viewName); assertUpdate(ddl); Assert.assertEquals(computeActual("SHOW CREATE VIEW " + viewName).getOnlyValue(), ddl); @@ -1100,53 +1289,99 @@ public void testRenameMaterializedView() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)); String schema = "rename_mv_test"; - Session session = Session.builder(getSession()) - .setSchema(schema) - .build(); + Session session = Session.builder(getSession()).setSchema(schema).build(); - QualifiedObjectName originalMaterializedView = new QualifiedObjectName( - session.getCatalog().orElseThrow(), - session.getSchema().orElseThrow(), - "test_materialized_view_rename_" + randomNameSuffix()); + QualifiedObjectName originalMaterializedView = + new QualifiedObjectName( + session.getCatalog().orElseThrow(), + session.getSchema().orElseThrow(), + "test_materialized_view_rename_" + randomNameSuffix()); createTestingMaterializedView(originalMaterializedView, Optional.empty()); String renamedMaterializedView = "test_materialized_view_rename_new_" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_RENAME_MATERIALIZED_VIEW)) { - assertQueryFails(session, "ALTER MATERIALIZED VIEW " + originalMaterializedView + " RENAME TO " + renamedMaterializedView, "This connector does not support renaming materialized views"); + assertQueryFails( + session, + "ALTER MATERIALIZED VIEW " + + originalMaterializedView + + " RENAME TO " + + renamedMaterializedView, + "This connector does not support renaming materialized views"); assertUpdate(session, "DROP MATERIALIZED VIEW " + originalMaterializedView); return; } // simple rename - assertUpdate(session, "ALTER MATERIALIZED VIEW " + originalMaterializedView + " RENAME TO " + renamedMaterializedView); + assertUpdate( + session, + "ALTER MATERIALIZED VIEW " + + originalMaterializedView + + " RENAME TO " + + renamedMaterializedView); assertTestingMaterializedViewQuery(schema, renamedMaterializedView); // verify new name in the system.metadata.materialized_views - assertQuery(session, "SELECT catalog_name, schema_name FROM system.metadata.materialized_views WHERE name = '" + renamedMaterializedView + "'", - format("VALUES ('%s', '%s')", originalMaterializedView.getCatalogName(), originalMaterializedView.getSchemaName())); - assertQueryReturnsEmptyResult(session, listMaterializedViewsSql("name = '" + originalMaterializedView.getObjectName() + "'")); + assertQuery( + session, + "SELECT catalog_name, schema_name FROM system.metadata.materialized_views WHERE name = '" + + renamedMaterializedView + + "'", + format( + "VALUES ('%s', '%s')", + originalMaterializedView.getCatalogName(), originalMaterializedView.getSchemaName())); + assertQueryReturnsEmptyResult( + session, + listMaterializedViewsSql("name = '" + originalMaterializedView.getObjectName() + "'")); // rename with IF EXISTS on existing materialized view - String testExistsMaterializedViewName = "test_materialized_view_rename_exists_" + randomNameSuffix(); - assertUpdate(session, "ALTER MATERIALIZED VIEW IF EXISTS " + renamedMaterializedView + " RENAME TO " + testExistsMaterializedViewName); + String testExistsMaterializedViewName = + "test_materialized_view_rename_exists_" + randomNameSuffix(); + assertUpdate( + session, + "ALTER MATERIALIZED VIEW IF EXISTS " + + renamedMaterializedView + + " RENAME TO " + + testExistsMaterializedViewName); assertTestingMaterializedViewQuery(schema, testExistsMaterializedViewName); // rename with upper-case, not delimited identifier String uppercaseName = "TEST_MATERIALIZED_VIEW_RENAME_UPPERCASE_" + randomNameSuffix(); - assertUpdate(session, "ALTER MATERIALIZED VIEW " + testExistsMaterializedViewName + " RENAME TO " + uppercaseName); - assertTestingMaterializedViewQuery(schema, uppercaseName.toLowerCase(ENGLISH)); // Ensure select allows for lower-case, not delimited identifier + assertUpdate( + session, + "ALTER MATERIALIZED VIEW " + + testExistsMaterializedViewName + + " RENAME TO " + + uppercaseName); + assertTestingMaterializedViewQuery( + schema, + uppercaseName.toLowerCase( + ENGLISH)); // Ensure select allows for lower-case, not delimited identifier String otherSchema = "rename_mv_other_schema"; assertUpdate(format("CREATE SCHEMA IF NOT EXISTS %s", otherSchema)); if (hasBehavior(SUPPORTS_RENAME_MATERIALIZED_VIEW_ACROSS_SCHEMAS)) { - assertUpdate(session, "ALTER MATERIALIZED VIEW " + uppercaseName + " RENAME TO " + otherSchema + "." + originalMaterializedView.getObjectName()); + assertUpdate( + session, + "ALTER MATERIALIZED VIEW " + + uppercaseName + + " RENAME TO " + + otherSchema + + "." + + originalMaterializedView.getObjectName()); assertTestingMaterializedViewQuery(otherSchema, originalMaterializedView.getObjectName()); - assertUpdate(session, "DROP MATERIALIZED VIEW " + otherSchema + "." + originalMaterializedView.getObjectName()); + assertUpdate( + session, + "DROP MATERIALIZED VIEW " + otherSchema + "." + originalMaterializedView.getObjectName()); } else { assertQueryFails( session, - "ALTER MATERIALIZED VIEW " + uppercaseName + " RENAME TO " + otherSchema + "." + originalMaterializedView.getObjectName(), + "ALTER MATERIALIZED VIEW " + + uppercaseName + + " RENAME TO " + + otherSchema + + "." + + originalMaterializedView.getObjectName(), "Materialized View rename across schemas is not supported"); assertUpdate(session, "DROP MATERIALIZED VIEW " + uppercaseName); } @@ -1156,9 +1391,17 @@ public void testRenameMaterializedView() { assertFalse(getQueryRunner().tableExists(session, testExistsMaterializedViewName)); // rename with IF EXISTS on NOT existing materialized view - assertUpdate(session, "ALTER TABLE IF EXISTS " + originalMaterializedView + " RENAME TO " + renamedMaterializedView); - assertQueryReturnsEmptyResult(session, listMaterializedViewsSql("name = '" + originalMaterializedView.getObjectName() + "'")); - assertQueryReturnsEmptyResult(session, listMaterializedViewsSql("name = '" + renamedMaterializedView + "'")); + assertUpdate( + session, + "ALTER TABLE IF EXISTS " + + originalMaterializedView + + " RENAME TO " + + renamedMaterializedView); + assertQueryReturnsEmptyResult( + session, + listMaterializedViewsSql("name = '" + originalMaterializedView.getObjectName() + "'")); + assertQueryReturnsEmptyResult( + session, listMaterializedViewsSql("name = '" + renamedMaterializedView + "'")); } private void assertTestingMaterializedViewQuery(String schema, String materializedViewName) { @@ -1169,13 +1412,14 @@ private void assertTestingMaterializedViewQuery(String schema, String materializ private void createTestingMaterializedView(QualifiedObjectName view, Optional comment) { assertUpdate(format("CREATE SCHEMA IF NOT EXISTS %s", view.getSchemaName())); - assertUpdate(format( - "CREATE MATERIALIZED VIEW %s %s AS SELECT * FROM nation", - view, - comment.map(c -> format("COMMENT '%s'", c)).orElse(""))); + assertUpdate( + format( + "CREATE MATERIALIZED VIEW %s %s AS SELECT * FROM nation", + view, comment.map(c -> format("COMMENT '%s'", c)).orElse(""))); } - private String getTestingMaterializedViewsResultRow(QualifiedObjectName materializedView, String comment) { + private String getTestingMaterializedViewsResultRow( + QualifiedObjectName materializedView, String comment) { return format( "VALUES ('%s', '%s', '%s', '%s', 'SELECT *\nFROM\n nation\n')", materializedView.getCatalogName(), @@ -1185,13 +1429,11 @@ private String getTestingMaterializedViewsResultRow(QualifiedObjectName material } private String getTestingMaterializedViewsResultRows( - QualifiedObjectName materializedView, - QualifiedObjectName otherMaterializedView) { + QualifiedObjectName materializedView, QualifiedObjectName otherMaterializedView) { String viewDefinitionSql = "SELECT *\nFROM\n nation\n"; return format( - "VALUES ('%s', '%s', '%s', '', '%s')," + - "('%s', '%s', '%s', 'sarcastic comment', '%s')", + "VALUES ('%s', '%s', '%s', '', '%s')," + "('%s', '%s', '%s', 'sarcastic comment', '%s')", materializedView.getCatalogName(), materializedView.getSchemaName(), materializedView.getObjectName(), @@ -1203,14 +1445,16 @@ private String getTestingMaterializedViewsResultRows( } private String listMaterializedViewsSql(String... filterClauses) { - StringBuilder sql = new StringBuilder("SELECT" + - " catalog_name," + - " schema_name," + - " name," + - " comment," + - " definition " + - "FROM system.metadata.materialized_views " + - "WHERE true"); + StringBuilder sql = + new StringBuilder( + "SELECT" + + " catalog_name," + + " schema_name," + + " name," + + " comment," + + " definition " + + "FROM system.metadata.materialized_views " + + "WHERE true"); for (String filterClause : filterClauses) { sql.append(" AND ").append(filterClause); @@ -1224,7 +1468,8 @@ public void testViewAndMaterializedViewTogether() { if (!hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW) || !hasBehavior(SUPPORTS_CREATE_VIEW)) { return; } - // Validate that it is possible to have views and materialized views defined at the same time and both are operational + // Validate that it is possible to have views and materialized views defined at the same time + // and both are operational String schemaName = getSession().getSchema().orElseThrow(); @@ -1235,8 +1480,13 @@ public void testViewAndMaterializedViewTogether() { assertUpdate("CREATE MATERIALIZED VIEW " + materializedViewName + " AS SELECT * FROM nation"); // both should be accessible via information_schema.views - // TODO: actually it is not the cased now hence overridable `checkInformationSchemaViewsForMaterializedView` - assertThat(query("SELECT table_name FROM information_schema.views WHERE table_schema = '" + schemaName + "'")) + // TODO: actually it is not the cased now hence overridable + // `checkInformationSchemaViewsForMaterializedView` + assertThat( + query( + "SELECT table_name FROM information_schema.views WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES '" + regularViewName + "'"); checkInformationSchemaViewsForMaterializedView(schemaName, materializedViewName); @@ -1250,21 +1500,28 @@ public void testViewAndMaterializedViewTogether() { } // TODO inline when all implementations fixed - protected void checkInformationSchemaViewsForMaterializedView(String schemaName, String viewName) { - assertThat(query("SELECT table_name FROM information_schema.views WHERE table_schema = '" + schemaName + "'")) + protected void checkInformationSchemaViewsForMaterializedView( + String schemaName, String viewName) { + assertThat( + query( + "SELECT table_name FROM information_schema.views WHERE table_schema = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES '" + viewName + "'"); } // /** - // * Test that reading table, column metadata, like {@code SHOW TABLES} or reading from {@code information_schema.views} + // * Test that reading table, column metadata, like {@code SHOW TABLES} or reading from {@code + // information_schema.views} // * does not fail when relations are concurrently created or dropped. // */ // @Test(timeOut = 180_000) // public void testReadMetadataWithRelationsConcurrentModifications() // throws Exception // { - // if (!hasBehavior(SUPPORTS_CREATE_TABLE) && !hasBehavior(SUPPORTS_CREATE_VIEW) && !hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)) { + // if (!hasBehavior(SUPPORTS_CREATE_TABLE) && !hasBehavior(SUPPORTS_CREATE_VIEW) && + // !hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)) { // throw new SkipException("Cannot test"); // } // @@ -1275,43 +1532,87 @@ protected void checkInformationSchemaViewsForMaterializedView(String schemaName, // testReadMetadataWithRelationsConcurrentModifications(readIterations, testTimeoutSeconds); // } - protected void testReadMetadataWithRelationsConcurrentModifications(int readIterations, int testTimeoutSeconds) - throws Exception { + protected void testReadMetadataWithRelationsConcurrentModifications( + int readIterations, int testTimeoutSeconds) throws Exception { Stopwatch testWatch = Stopwatch.createStarted(); - int readerTasksCount = 6 - + (hasBehavior(SUPPORTS_CREATE_VIEW) ? 1 : 0) - + (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW) ? 1 : 0); + int readerTasksCount = + 6 + + (hasBehavior(SUPPORTS_CREATE_VIEW) ? 1 : 0) + + (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW) ? 1 : 0); AtomicInteger incompleteReadTasks = new AtomicInteger(readerTasksCount); List> readerTasks = new ArrayList<>(); readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SHOW TABLES")); - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM information_schema.tables WHERE table_schema = CURRENT_SCHEMA")); - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA")); - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM system.jdbc.tables WHERE table_cat = CURRENT_CATALOG AND table_schem = CURRENT_SCHEMA")); - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM system.jdbc.columns WHERE table_cat = CURRENT_CATALOG AND table_schem = CURRENT_SCHEMA")); - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM system.metadata.table_comments WHERE catalog_name = CURRENT_CATALOG AND schema_name = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM information_schema.tables WHERE table_schema = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM system.jdbc.tables WHERE table_cat = CURRENT_CATALOG AND table_schem = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM system.jdbc.columns WHERE table_cat = CURRENT_CATALOG AND table_schem = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM system.metadata.table_comments WHERE catalog_name = CURRENT_CATALOG AND schema_name = CURRENT_SCHEMA")); if (hasBehavior(SUPPORTS_CREATE_VIEW)) { - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM information_schema.views WHERE table_schema = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM information_schema.views WHERE table_schema = CURRENT_SCHEMA")); } if (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)) { - readerTasks.add(queryRepeatedly(readIterations, incompleteReadTasks, "SELECT * FROM system.metadata.materialized_views WHERE catalog_name = CURRENT_CATALOG AND schema_name = CURRENT_SCHEMA")); + readerTasks.add( + queryRepeatedly( + readIterations, + incompleteReadTasks, + "SELECT * FROM system.metadata.materialized_views WHERE catalog_name = CURRENT_CATALOG AND schema_name = CURRENT_SCHEMA")); } assertEquals(readerTasks.size(), readerTasksCount); - int writeTasksCount = 1 - + (hasBehavior(SUPPORTS_CREATE_VIEW) ? 1 : 0) - + (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW) ? 1 : 0); + int writeTasksCount = + 1 + + (hasBehavior(SUPPORTS_CREATE_VIEW) ? 1 : 0) + + (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW) ? 1 : 0); writeTasksCount = 2 * writeTasksCount; // writes are scheduled twice CountDownLatch writeTasksInitialized = new CountDownLatch(writeTasksCount); Runnable writeInitialized = writeTasksInitialized::countDown; Supplier done = () -> incompleteReadTasks.get() == 0; List> writeTasks = new ArrayList<>(); - writeTasks.add(createDropRepeatedly(writeInitialized, done, "concur_table", "CREATE TABLE %s(a integer)", "DROP TABLE %s")); + writeTasks.add( + createDropRepeatedly( + writeInitialized, done, "concur_table", "CREATE TABLE %s(a integer)", "DROP TABLE %s")); if (hasBehavior(SUPPORTS_CREATE_VIEW)) { - writeTasks.add(createDropRepeatedly(writeInitialized, done, "concur_view", "CREATE VIEW %s AS SELECT 1 a", "DROP VIEW %s")); + writeTasks.add( + createDropRepeatedly( + writeInitialized, + done, + "concur_view", + "CREATE VIEW %s AS SELECT 1 a", + "DROP VIEW %s")); } if (hasBehavior(SUPPORTS_CREATE_MATERIALIZED_VIEW)) { - writeTasks.add(createDropRepeatedly(writeInitialized, done, "concur_mview", "CREATE MATERIALIZED VIEW %s AS SELECT 1 a", "DROP MATERIALIZED VIEW %s")); + writeTasks.add( + createDropRepeatedly( + writeInitialized, + done, + "concur_mview", + "CREATE MATERIALIZED VIEW %s AS SELECT 1 a", + "DROP MATERIALIZED VIEW %s")); } assertEquals(writeTasks.size() * 2, writeTasksCount); @@ -1331,7 +1632,11 @@ protected void testReadMetadataWithRelationsConcurrentModifications(int readIter for (int i = 0; i < readerTasksCount + writeTasksCount; i++) { long remainingTimeSeconds = testTimeoutSeconds - testWatch.elapsed(SECONDS); Future future = completionService.poll(remainingTimeSeconds, SECONDS); - verifyNotNull(future, "Task did not completed before timeout; completed tasks: %s, current poll timeout: %s s", i, remainingTimeSeconds); + verifyNotNull( + future, + "Task did not completed before timeout; completed tasks: %s, current poll timeout: %s s", + i, + remainingTimeSeconds); future.get(); // non-blocking } } finally { @@ -1341,10 +1646,11 @@ protected void testReadMetadataWithRelationsConcurrentModifications(int readIter } /** - * Run {@code sql} query at least {@code minIterations} times and keep running until other tasks complete. - * {@code incompleteReadTasks} is used for orchestrating end of execution. + * Run {@code sql} query at least {@code minIterations} times and keep running until other tasks + * complete. {@code incompleteReadTasks} is used for orchestrating end of execution. */ - protected Callable queryRepeatedly(int minIterations, AtomicInteger incompleteReadTasks, @Language("SQL") String sql) { + protected Callable queryRepeatedly( + int minIterations, AtomicInteger incompleteReadTasks, @Language("SQL") String sql) { return new Callable<>() { @Override public Void call() { @@ -1354,9 +1660,14 @@ public Void call() { alwaysEmpty &= result.getRowCount() == 0; } if (alwaysEmpty) { - fail(format("The results of [%s] are always empty after %s iterations, this may indicate test misconfiguration or broken connector behavior", sql, minIterations)); + fail( + format( + "The results of [%s] are always empty after %s iterations, this may indicate test misconfiguration or broken connector behavior", + sql, minIterations)); } - assertThat(incompleteReadTasks.decrementAndGet()).as("incompleteReadTasks").isGreaterThanOrEqualTo(0); + assertThat(incompleteReadTasks.decrementAndGet()) + .as("incompleteReadTasks") + .isGreaterThanOrEqualTo(0); // Keep running so that faster test queries have same length of exposure in wall time while (incompleteReadTasks.get() != 0) { computeActual(sql); @@ -1371,7 +1682,12 @@ public String toString() { }; } - protected Callable createDropRepeatedly(Runnable initReady, Supplier done, String namePrefix, String createTemplate, String dropTemplate) { + protected Callable createDropRepeatedly( + Runnable initReady, + Supplier done, + String namePrefix, + String createTemplate, + String dropTemplate) { return new Callable<>() { @Override public Void call() { @@ -1402,21 +1718,22 @@ public String toString() { }; } - protected void submitTasks(List> callables, CompletionService completionService) { + protected void submitTasks( + List> callables, CompletionService completionService) { for (Callable callable : callables) { String taskDescription = callable.toString(); - completionService.submit(new Callable() { - @Override - public T call() - throws Exception { - try { - return callable.call(); - } catch (Throwable e) { - e.addSuppressed(new Exception("Task: " + taskDescription)); - throw e; - } - } - }); + completionService.submit( + new Callable() { + @Override + public T call() throws Exception { + try { + return callable.call(); + } catch (Throwable e) { + e.addSuppressed(new Exception("Task: " + taskDescription)); + throw e; + } + } + }); } } @@ -1425,10 +1742,11 @@ public void testExplainAnalyze() { assertExplainAnalyze("EXPLAIN ANALYZE SELECT * FROM orders"); assertExplainAnalyze("EXPLAIN ANALYZE SELECT count(*), clerk FROM orders GROUP BY clerk"); assertExplainAnalyze( - "EXPLAIN ANALYZE SELECT x + y FROM (" + - " SELECT orderdate, COUNT(*) x FROM orders GROUP BY orderdate) a JOIN (" + - " SELECT orderdate, COUNT(*) y FROM orders GROUP BY orderdate) b ON a.orderdate = b.orderdate"); - assertExplainAnalyze("EXPLAIN ANALYZE SELECT count(*), clerk FROM orders GROUP BY clerk UNION ALL SELECT sum(orderkey), clerk FROM orders GROUP BY clerk"); + "EXPLAIN ANALYZE SELECT x + y FROM (" + + " SELECT orderdate, COUNT(*) x FROM orders GROUP BY orderdate) a JOIN (" + + " SELECT orderdate, COUNT(*) y FROM orders GROUP BY orderdate) b ON a.orderdate = b.orderdate"); + assertExplainAnalyze( + "EXPLAIN ANALYZE SELECT count(*), clerk FROM orders GROUP BY clerk UNION ALL SELECT sum(orderkey), clerk FROM orders GROUP BY clerk"); assertExplainAnalyze("EXPLAIN ANALYZE SHOW COLUMNS FROM orders"); assertExplainAnalyze("EXPLAIN ANALYZE EXPLAIN SELECT count(*) FROM orders"); @@ -1443,15 +1761,20 @@ public void testExplainAnalyze() { @Test public void testExplainAnalyzeVerbose() { assertExplainAnalyze("EXPLAIN ANALYZE VERBOSE SELECT * FROM orders"); - assertExplainAnalyze("EXPLAIN ANALYZE VERBOSE SELECT rank() OVER (PARTITION BY orderkey ORDER BY clerk DESC) FROM orders"); - assertExplainAnalyze("EXPLAIN ANALYZE VERBOSE SELECT rank() OVER (PARTITION BY orderkey ORDER BY clerk DESC) FROM orders WHERE orderkey < 0"); + assertExplainAnalyze( + "EXPLAIN ANALYZE VERBOSE SELECT rank() OVER (PARTITION BY orderkey ORDER BY clerk DESC) FROM orders"); + assertExplainAnalyze( + "EXPLAIN ANALYZE VERBOSE SELECT rank() OVER (PARTITION BY orderkey ORDER BY clerk DESC) FROM orders WHERE orderkey < 0"); } @Test public void testTableSampleSystem() { - MaterializedResult fullSample = computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (100)"); - MaterializedResult emptySample = computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (0)"); - MaterializedResult randomSample = computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (50)"); + MaterializedResult fullSample = + computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (100)"); + MaterializedResult emptySample = + computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (0)"); + MaterializedResult randomSample = + computeActual("SELECT orderkey FROM orders TABLESAMPLE SYSTEM (50)"); MaterializedResult all = computeActual("SELECT orderkey FROM orders"); assertContains(all, fullSample); @@ -1461,31 +1784,39 @@ public void testTableSampleSystem() { @Test public void testTableSampleWithFiltering() { - MaterializedResult emptySample = computeActual("SELECT DISTINCT orderkey, orderdate FROM orders TABLESAMPLE SYSTEM (99) WHERE orderkey BETWEEN 0 AND 0"); - MaterializedResult halfSample = computeActual("SELECT DISTINCT orderkey, orderdate FROM orders TABLESAMPLE SYSTEM (50) WHERE orderkey BETWEEN 0 AND 9999999999"); + MaterializedResult emptySample = + computeActual( + "SELECT DISTINCT orderkey, orderdate FROM orders TABLESAMPLE SYSTEM (99) WHERE orderkey BETWEEN 0 AND 0"); + MaterializedResult halfSample = + computeActual( + "SELECT DISTINCT orderkey, orderdate FROM orders TABLESAMPLE SYSTEM (50) WHERE orderkey BETWEEN 0 AND 9999999999"); MaterializedResult all = computeActual("SELECT orderkey, orderdate FROM orders"); Assert.assertEquals(emptySample.getMaterializedRows().size(), 0); - // Assertions need to be loose here because SYSTEM sampling random selects data on split boundaries. In this case either all the data will be selected, or - // none of it. Sampling with a 100% ratio is ignored, so that also cannot be used to guarantee results. + // Assertions need to be loose here because SYSTEM sampling random selects data on split + // boundaries. In this case either all the data will be selected, or + // none of it. Sampling with a 100% ratio is ignored, so that also cannot be used to guarantee + // results. assertTrue(all.getMaterializedRows().size() >= halfSample.getMaterializedRows().size()); } @Test public void testShowCreateTable() { assertThat((String) computeActual("SHOW CREATE TABLE orders").getOnlyValue()) - // If the connector reports additional column properties, the expected value needs to be adjusted in the test subclass - .matches("CREATE TABLE \\w+\\.\\w+\\.orders \\Q(\n" + - " orderkey bigint,\n" + - " custkey bigint,\n" + - " orderstatus varchar(1),\n" + - " totalprice double,\n" + - " orderdate date,\n" + - " orderpriority varchar(15),\n" + - " clerk varchar(15),\n" + - " shippriority integer,\n" + - " comment varchar(79)\n" + - ")"); + // If the connector reports additional column properties, the expected value needs to be + // adjusted in the test subclass + .matches( + "CREATE TABLE \\w+\\.\\w+\\.orders \\Q(\n" + + " orderkey bigint,\n" + + " custkey bigint,\n" + + " orderstatus varchar(1),\n" + + " totalprice double,\n" + + " orderdate date,\n" + + " orderpriority varchar(15),\n" + + " clerk varchar(15),\n" + + " shippriority integer,\n" + + " comment varchar(79)\n" + + ")"); } @Test @@ -1494,26 +1825,44 @@ public void testSelectInformationSchemaTables() { String schema = getSession().getSchema().get(); String schemaPattern = schema.replaceAll("^.", "_"); - assertQuery("SELECT table_name FROM information_schema.tables WHERE table_schema = '" + schema + "' AND table_name = 'orders'", "VALUES 'orders'"); - assertQuery("SELECT table_name FROM information_schema.tables WHERE table_schema LIKE '" + schema + "' AND table_name LIKE '%rders'", "VALUES 'orders'"); - assertQuery("SELECT table_name FROM information_schema.tables WHERE table_schema LIKE '" + schemaPattern + "' AND table_name LIKE '%rders'", "VALUES 'orders'"); assertQuery( - "SELECT table_name FROM information_schema.tables " + - "WHERE table_catalog = '" + catalog + "' AND table_schema LIKE '" + schema + "' AND table_name LIKE '%orders'", + "SELECT table_name FROM information_schema.tables WHERE table_schema = '" + + schema + + "' AND table_name = 'orders'", + "VALUES 'orders'"); + assertQuery( + "SELECT table_name FROM information_schema.tables WHERE table_schema LIKE '" + + schema + + "' AND table_name LIKE '%rders'", + "VALUES 'orders'"); + assertQuery( + "SELECT table_name FROM information_schema.tables WHERE table_schema LIKE '" + + schemaPattern + + "' AND table_name LIKE '%rders'", + "VALUES 'orders'"); + assertQuery( + "SELECT table_name FROM information_schema.tables " + + "WHERE table_catalog = '" + + catalog + + "' AND table_schema LIKE '" + + schema + + "' AND table_name LIKE '%orders'", "VALUES 'orders'"); - assertQuery("SELECT table_name FROM information_schema.tables WHERE table_catalog = 'something_else'", "SELECT '' WHERE false"); + assertQuery( + "SELECT table_name FROM information_schema.tables WHERE table_catalog = 'something_else'", + "SELECT '' WHERE false"); assertQuery( "SELECT DISTINCT table_name FROM information_schema.tables WHERE table_schema = 'information_schema' OR rand() = 42 ORDER BY 1", - "VALUES " + - "('applicable_roles'), " + - "('columns'), " + - "('enabled_roles'), " + - "('roles'), " + - "('schemata'), " + - "('table_privileges'), " + - "('tables'), " + - "('views')"); + "VALUES " + + "('applicable_roles'), " + + "('columns'), " + + "('enabled_roles'), " + + "('roles'), " + + "('schemata'), " + + "('table_privileges'), " + + "('tables'), " + + "('views')"); } @Test @@ -1522,66 +1871,120 @@ public void testSelectInformationSchemaColumns() { String schema = getSession().getSchema().get(); String schemaPattern = schema.replaceAll(".$", "_"); - @Language("SQL") String ordersTableWithColumns = "VALUES " + - "('orders', 'orderkey'), " + - "('orders', 'custkey'), " + - "('orders', 'orderstatus'), " + - "('orders', 'totalprice'), " + - "('orders', 'orderdate'), " + - "('orders', 'orderpriority'), " + - "('orders', 'clerk'), " + - "('orders', 'shippriority'), " + - "('orders', 'comment')"; - - assertQuery("SELECT table_schema FROM information_schema.columns WHERE table_schema = '" + schema + "' GROUP BY table_schema", "VALUES '" + schema + "'"); - assertQuery("SELECT table_name FROM information_schema.columns WHERE table_name = 'orders' GROUP BY table_name", "VALUES 'orders'"); - assertQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name = 'orders'", ordersTableWithColumns); - assertQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name LIKE '%rders'", ordersTableWithColumns); - assertQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_schema LIKE '" + schemaPattern + "' AND table_name LIKE '_rder_'", ordersTableWithColumns); - assertThat(query( - "SELECT table_name, column_name FROM information_schema.columns " + - "WHERE table_catalog = '" + catalog + "' AND table_schema = '" + schema + "' AND table_name LIKE '%orders%'")) + @Language("SQL") + String ordersTableWithColumns = + "VALUES " + + "('orders', 'orderkey'), " + + "('orders', 'custkey'), " + + "('orders', 'orderstatus'), " + + "('orders', 'totalprice'), " + + "('orders', 'orderdate'), " + + "('orders', 'orderpriority'), " + + "('orders', 'clerk'), " + + "('orders', 'shippriority'), " + + "('orders', 'comment')"; + + assertQuery( + "SELECT table_schema FROM information_schema.columns WHERE table_schema = '" + + schema + + "' GROUP BY table_schema", + "VALUES '" + schema + "'"); + assertQuery( + "SELECT table_name FROM information_schema.columns WHERE table_name = 'orders' GROUP BY table_name", + "VALUES 'orders'"); + assertQuery( + "SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = '" + + schema + + "' AND table_name = 'orders'", + ordersTableWithColumns); + assertQuery( + "SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = '" + + schema + + "' AND table_name LIKE '%rders'", + ordersTableWithColumns); + assertQuery( + "SELECT table_name, column_name FROM information_schema.columns WHERE table_schema LIKE '" + + schemaPattern + + "' AND table_name LIKE '_rder_'", + ordersTableWithColumns); + assertThat( + query( + "SELECT table_name, column_name FROM information_schema.columns " + + "WHERE table_catalog = '" + + catalog + + "' AND table_schema = '" + + schema + + "' AND table_name LIKE '%orders%'")) .skippingTypesCheck() .containsAll(ordersTableWithColumns); assertQuerySucceeds("SELECT * FROM information_schema.columns"); - assertQuery("SELECT DISTINCT table_name, column_name FROM information_schema.columns WHERE table_name LIKE '_rders'", ordersTableWithColumns); - assertQuerySucceeds("SELECT * FROM information_schema.columns WHERE table_catalog = '" + catalog + "'"); - assertQuerySucceeds("SELECT * FROM information_schema.columns WHERE table_catalog = '" + catalog + "' AND table_schema = '" + schema + "'"); - assertQuery("SELECT table_name, column_name FROM information_schema.columns WHERE table_catalog = '" + catalog + "' AND table_schema = '" + schema + "' AND table_name LIKE '_rders'", ordersTableWithColumns); - assertQuerySucceeds("SELECT * FROM information_schema.columns WHERE table_catalog = '" + catalog + "' AND table_name LIKE '%'"); - assertQuery("SELECT column_name FROM information_schema.columns WHERE table_catalog = 'something_else'", "SELECT '' WHERE false"); + assertQuery( + "SELECT DISTINCT table_name, column_name FROM information_schema.columns WHERE table_name LIKE '_rders'", + ordersTableWithColumns); + assertQuerySucceeds( + "SELECT * FROM information_schema.columns WHERE table_catalog = '" + catalog + "'"); + assertQuerySucceeds( + "SELECT * FROM information_schema.columns WHERE table_catalog = '" + + catalog + + "' AND table_schema = '" + + schema + + "'"); + assertQuery( + "SELECT table_name, column_name FROM information_schema.columns WHERE table_catalog = '" + + catalog + + "' AND table_schema = '" + + schema + + "' AND table_name LIKE '_rders'", + ordersTableWithColumns); + assertQuerySucceeds( + "SELECT * FROM information_schema.columns WHERE table_catalog = '" + + catalog + + "' AND table_name LIKE '%'"); + assertQuery( + "SELECT column_name FROM information_schema.columns WHERE table_catalog = 'something_else'", + "SELECT '' WHERE false"); assertQuery( "SELECT DISTINCT table_name FROM information_schema.columns WHERE table_schema = 'information_schema' OR rand() = 42 ORDER BY 1", - "VALUES " + - "('applicable_roles'), " + - "('columns'), " + - "('enabled_roles'), " + - "('roles'), " + - "('schemata'), " + - "('table_privileges'), " + - "('tables'), " + - "('views')"); + "VALUES " + + "('applicable_roles'), " + + "('columns'), " + + "('enabled_roles'), " + + "('roles'), " + + "('schemata'), " + + "('table_privileges'), " + + "('tables'), " + + "('views')"); } @Test public void testShowCreateInformationSchema() { assertThat(query("SHOW CREATE SCHEMA information_schema")) .skippingTypesCheck() - .matches(format("VALUES 'CREATE SCHEMA %s.information_schema'", getSession().getCatalog().orElseThrow())); + .matches( + format( + "VALUES 'CREATE SCHEMA %s.information_schema'", + getSession().getCatalog().orElseThrow())); } @Test public void testShowCreateInformationSchemaTable() { - assertQueryFails("SHOW CREATE VIEW information_schema.schemata", "line 1:1: Relation '\\w+.information_schema.schemata' is a table, not a view"); - assertQueryFails("SHOW CREATE MATERIALIZED VIEW information_schema.schemata", "line 1:1: Relation '\\w+.information_schema.schemata' is a table, not a materialized view"); + assertQueryFails( + "SHOW CREATE VIEW information_schema.schemata", + "line 1:1: Relation '\\w+.information_schema.schemata' is a table, not a view"); + assertQueryFails( + "SHOW CREATE MATERIALIZED VIEW information_schema.schemata", + "line 1:1: Relation '\\w+.information_schema.schemata' is a table, not a materialized view"); assertThat((String) computeScalar("SHOW CREATE TABLE information_schema.schemata")) - .isEqualTo("CREATE TABLE " + getSession().getCatalog().orElseThrow() + ".information_schema.schemata (\n" + - " catalog_name varchar,\n" + - " schema_name varchar\n" + - ")"); + .isEqualTo( + "CREATE TABLE " + + getSession().getCatalog().orElseThrow() + + ".information_schema.schemata (\n" + + " catalog_name varchar,\n" + + " schema_name varchar\n" + + ")"); } @Test @@ -1591,38 +1994,46 @@ public void testRollback() { String table = "test_rollback_" + randomNameSuffix(); computeActual(format("CREATE TABLE %s (x int)", table)); - assertThatThrownBy(() -> - inTransaction(session -> { - assertUpdate(session, format("INSERT INTO %s VALUES (42)", table), 1); - throw new RollbackException(); - })) + assertThatThrownBy( + () -> + inTransaction( + session -> { + assertUpdate(session, format("INSERT INTO %s VALUES (42)", table), 1); + throw new RollbackException(); + })) .isInstanceOf(RollbackException.class); assertQuery(format("SELECT count(*) FROM %s", table), "SELECT 0"); } - private static class RollbackException - extends RuntimeException { - } + private static class RollbackException extends RuntimeException {} @Test public void testWriteNotAllowedInTransaction() { skipTestUnless(!hasBehavior(SUPPORTS_MULTI_STATEMENT_WRITES)); assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_SCHEMA, "CREATE SCHEMA write_not_allowed"); - assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_TABLE, "CREATE TABLE write_not_allowed (x int)"); + assertWriteNotAllowedInTransaction( + SUPPORTS_CREATE_TABLE, "CREATE TABLE write_not_allowed (x int)"); assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_TABLE, "DROP TABLE region"); - assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_TABLE_WITH_DATA, "CREATE TABLE write_not_allowed AS SELECT * FROM region"); - assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_VIEW, "CREATE VIEW write_not_allowed AS SELECT * FROM region"); - assertWriteNotAllowedInTransaction(SUPPORTS_CREATE_MATERIALIZED_VIEW, "CREATE MATERIALIZED VIEW write_not_allowed AS SELECT * FROM region"); - assertWriteNotAllowedInTransaction(SUPPORTS_RENAME_TABLE, "ALTER TABLE region RENAME TO region_name"); - assertWriteNotAllowedInTransaction(SUPPORTS_INSERT, "INSERT INTO region (regionkey) VALUES (123)"); + assertWriteNotAllowedInTransaction( + SUPPORTS_CREATE_TABLE_WITH_DATA, "CREATE TABLE write_not_allowed AS SELECT * FROM region"); + assertWriteNotAllowedInTransaction( + SUPPORTS_CREATE_VIEW, "CREATE VIEW write_not_allowed AS SELECT * FROM region"); + assertWriteNotAllowedInTransaction( + SUPPORTS_CREATE_MATERIALIZED_VIEW, + "CREATE MATERIALIZED VIEW write_not_allowed AS SELECT * FROM region"); + assertWriteNotAllowedInTransaction( + SUPPORTS_RENAME_TABLE, "ALTER TABLE region RENAME TO region_name"); + assertWriteNotAllowedInTransaction( + SUPPORTS_INSERT, "INSERT INTO region (regionkey) VALUES (123)"); assertWriteNotAllowedInTransaction(SUPPORTS_DELETE, "DELETE FROM region WHERE regionkey = 123"); // REFRESH MATERIALIZED VIEW is tested in testMaterializedView } - protected void assertWriteNotAllowedInTransaction(TestingConnectorBehavior behavior, @Language("SQL") String sql) { + protected void assertWriteNotAllowedInTransaction( + TestingConnectorBehavior behavior, @Language("SQL") String sql) { if (hasBehavior(behavior)) { assertThatThrownBy(() -> inTransaction(session -> computeActual(session, sql))) .hasMessageMatching("Catalog only supports writes using autocommit: \\w+"); @@ -1656,23 +2067,32 @@ public void testRenameSchema() { @Test public void testAddColumn() { if (!hasBehavior(SUPPORTS_ADD_COLUMN)) { - assertQueryFails("ALTER TABLE nation ADD COLUMN test_add_column bigint", "This connector does not support adding columns"); + assertQueryFails( + "ALTER TABLE nation ADD COLUMN test_add_column bigint", + "This connector does not support adding columns"); return; } String tableName; - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_add_column_", tableDefinitionForAddColumn())) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_add_column_", tableDefinitionForAddColumn())) { tableName = table.getName(); assertUpdate("INSERT INTO " + table.getName() + " SELECT 'first'", 1); - assertQueryFails("ALTER TABLE " + table.getName() + " ADD COLUMN x bigint", ".* Column 'x' already exists"); - assertQueryFails("ALTER TABLE " + table.getName() + " ADD COLUMN X bigint", ".* Column 'X' already exists"); - assertQueryFails("ALTER TABLE " + table.getName() + " ADD COLUMN q bad_type", ".* Unknown type 'bad_type' for column 'q'"); + assertQueryFails( + "ALTER TABLE " + table.getName() + " ADD COLUMN x bigint", + ".* Column 'x' already exists"); + assertQueryFails( + "ALTER TABLE " + table.getName() + " ADD COLUMN X bigint", + ".* Column 'X' already exists"); + assertQueryFails( + "ALTER TABLE " + table.getName() + " ADD COLUMN q bad_type", + ".* Unknown type 'bad_type' for column 'q'"); assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN a varchar(50)"); assertUpdate("INSERT INTO " + table.getName() + " SELECT 'second', 'xxx'", 1); assertQuery( - "SELECT x, a FROM " + table.getName(), - "VALUES ('first', NULL), ('second', 'xxx')"); + "SELECT x, a FROM " + table.getName(), "VALUES ('first', NULL), ('second', 'xxx')"); assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN b double"); assertUpdate("INSERT INTO " + table.getName() + " SELECT 'third', 'yyy', 33.3E0", 1); @@ -1682,7 +2102,8 @@ public void testAddColumn() { assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN IF NOT EXISTS c varchar(50)"); assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN IF NOT EXISTS c varchar(50)"); - assertUpdate("INSERT INTO " + table.getName() + " SELECT 'fourth', 'zzz', 55.3E0, 'newColumn'", 1); + assertUpdate( + "INSERT INTO " + table.getName() + " SELECT 'fourth', 'zzz', 55.3E0, 'newColumn'", 1); assertQuery( "SELECT x, a, b, c FROM " + table.getName(), "VALUES ('first', NULL, NULL, NULL), ('second', 'xxx', NULL, NULL), ('third', 'yyy', 33.3, NULL), ('fourth', 'zzz', 55.3, 'newColumn')"); @@ -1694,9 +2115,7 @@ public void testAddColumn() { assertFalse(getQueryRunner().tableExists(getSession(), tableName)); } - /** - * The table must have one column 'x' of varchar type. - */ + /** The table must have one column 'x' of varchar type. */ protected String tableDefinitionForAddColumn() { return "(x VARCHAR)"; } @@ -1708,14 +2127,20 @@ public void testAddColumnWithComment() { return; } if (!hasBehavior(SUPPORTS_ADD_COLUMN_WITH_COMMENT)) { - assertQueryFails("ALTER TABLE nation ADD COLUMN test_add_col_desc bigint COMMENT 'test column comment'", "This connector does not support adding columns with comments"); + assertQueryFails( + "ALTER TABLE nation ADD COLUMN test_add_col_desc bigint COMMENT 'test column comment'", + "This connector does not support adding columns with comments"); return; } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_add_col_desc_", "(a_varchar varchar)")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_add_col_desc_", "(a_varchar varchar)")) { String tableName = table.getName(); - assertUpdate("ALTER TABLE " + tableName + " ADD COLUMN b_varchar varchar COMMENT 'test new column comment'"); + assertUpdate( + "ALTER TABLE " + + tableName + + " ADD COLUMN b_varchar varchar COMMENT 'test new column comment'"); assertThat(getColumnComment(tableName, "b_varchar")).isEqualTo("test new column comment"); assertUpdate("ALTER TABLE " + tableName + " ADD COLUMN empty_comment varchar COMMENT ''"); @@ -1726,14 +2151,18 @@ public void testAddColumnWithComment() { @Test public void testDropColumn() { if (!hasBehavior(SUPPORTS_DROP_COLUMN)) { - assertQueryFails("ALTER TABLE nation DROP COLUMN nationkey", "This connector does not support dropping columns"); + assertQueryFails( + "ALTER TABLE nation DROP COLUMN nationkey", + "This connector does not support dropping columns"); return; } skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); String tableName; - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_drop_column_", "AS SELECT 123 x, 456 y, 111 a")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_drop_column_", "AS SELECT 123 x, 456 y, 111 a")) { tableName = table.getName(); assertUpdate("ALTER TABLE " + tableName + " DROP COLUMN x"); assertUpdate("ALTER TABLE " + tableName + " DROP COLUMN IF EXISTS y"); @@ -1741,7 +2170,9 @@ public void testDropColumn() { assertQueryFails("SELECT x FROM " + tableName, ".* Column 'x' cannot be resolved"); assertQueryFails("SELECT y FROM " + tableName, ".* Column 'y' cannot be resolved"); - assertQueryFails("ALTER TABLE " + tableName + " DROP COLUMN a", ".* Cannot drop the only column in a table"); + assertQueryFails( + "ALTER TABLE " + tableName + " DROP COLUMN a", + ".* Cannot drop the only column in a table"); } assertFalse(getQueryRunner().tableExists(getSession(), tableName)); @@ -1753,29 +2184,32 @@ public void testDropColumn() { @Test public void testRenameColumn() { if (!hasBehavior(SUPPORTS_RENAME_COLUMN)) { - assertQueryFails("ALTER TABLE nation RENAME COLUMN nationkey TO test_rename_column", "This connector does not support renaming columns"); + assertQueryFails( + "ALTER TABLE nation RENAME COLUMN nationkey TO test_rename_column", + "This connector does not support renaming columns"); return; } skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); String tableName; - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_rename_column_", "AS SELECT 'some value' x")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_rename_column_", "AS SELECT 'some value' x")) { tableName = table.getName(); assertUpdate("ALTER TABLE " + tableName + " RENAME COLUMN x TO before_y"); assertUpdate("ALTER TABLE " + tableName + " RENAME COLUMN IF EXISTS before_y TO y"); assertUpdate("ALTER TABLE " + tableName + " RENAME COLUMN IF EXISTS columnNotExists TO y"); assertQuery("SELECT y FROM " + tableName, "VALUES 'some value'"); - assertUpdate("ALTER TABLE " + tableName + " RENAME COLUMN y TO Z"); // 'Z' is upper-case, not delimited + assertUpdate( + "ALTER TABLE " + tableName + " RENAME COLUMN y TO Z"); // 'Z' is upper-case, not delimited assertQuery( "SELECT z FROM " + tableName, // 'z' is lower-case, not delimited "VALUES 'some value'"); assertUpdate("ALTER TABLE " + tableName + " RENAME COLUMN IF EXISTS z TO a"); - assertQuery( - "SELECT a FROM " + tableName, - "VALUES 'some value'"); + assertQuery("SELECT a FROM " + tableName, "VALUES 'some value'"); // There should be exactly one column assertQuery("SELECT * FROM " + tableName, "VALUES 'some value'"); @@ -1783,7 +2217,8 @@ public void testRenameColumn() { assertFalse(getQueryRunner().tableExists(getSession(), tableName)); assertUpdate("ALTER TABLE IF EXISTS " + tableName + " RENAME COLUMN columnNotExists TO y"); - assertUpdate("ALTER TABLE IF EXISTS " + tableName + " RENAME COLUMN IF EXISTS columnNotExists TO y"); + assertUpdate( + "ALTER TABLE IF EXISTS " + tableName + " RENAME COLUMN IF EXISTS columnNotExists TO y"); assertFalse(getQueryRunner().tableExists(getSession(), tableName)); } @@ -1791,22 +2226,30 @@ public void testRenameColumn() { public void testCreateTable() { String tableName = "test_create_" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_CREATE_TABLE)) { - assertQueryFails("CREATE TABLE " + tableName + " (a bigint, b double, c varchar(50))", "This connector does not support creating tables"); + assertQueryFails( + "CREATE TABLE " + tableName + " (a bigint, b double, c varchar(50))", + "This connector does not support creating tables"); return; } assertUpdate("CREATE TABLE " + tableName + " (a bigint, b double, c varchar(50))"); assertTrue(getQueryRunner().tableExists(getSession(), tableName)); assertTableColumnNames(tableName, "a", "b", "c"); - assertNull(getTableComment(getSession().getCatalog().orElseThrow(), getSession().getSchema().orElseThrow(), tableName)); + assertNull( + getTableComment( + getSession().getCatalog().orElseThrow(), + getSession().getSchema().orElseThrow(), + tableName)); assertUpdate("DROP TABLE " + tableName); assertFalse(getQueryRunner().tableExists(getSession(), tableName)); - assertQueryFails("CREATE TABLE " + tableName + " (a bad_type)", ".* Unknown type 'bad_type' for column 'a'"); + assertQueryFails( + "CREATE TABLE " + tableName + " (a bad_type)", ".* Unknown type 'bad_type' for column 'a'"); assertFalse(getQueryRunner().tableExists(getSession(), tableName)); - // TODO (https://github.com/trinodb/trino/issues/5901) revert to longer name when Oracle version is updated + // TODO (https://github.com/trinodb/trino/issues/5901) revert to longer name when Oracle version + // is updated tableName = "test_cr_not_exists_" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " (a bigint, b varchar(50), c double)"); assertTrue(getQueryRunner().tableExists(getSession(), tableName)); @@ -1826,7 +2269,8 @@ public void testCreateTable() { assertTableColumnNames(tableName, "a", "b", "c"); String tableNameLike = "test_create_like_" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableNameLike + " (LIKE " + tableName + ", d bigint, e varchar(50))"); + assertUpdate( + "CREATE TABLE " + tableNameLike + " (LIKE " + tableName + ", d bigint, e varchar(50))"); assertTrue(getQueryRunner().tableExists(getSession(), tableNameLike)); assertTableColumnNames(tableNameLike, "a", "b", "c", "d", "e"); @@ -1856,21 +2300,29 @@ public void testCreateTableSchemaNotFound() { public void testCreateTableAsSelect() { String tableName = "test_ctas" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_CREATE_TABLE)) { - assertQueryFails("CREATE TABLE IF NOT EXISTS " + tableName + " AS SELECT name, regionkey FROM nation", "This connector does not support creating tables with data"); + assertQueryFails( + "CREATE TABLE IF NOT EXISTS " + tableName + " AS SELECT name, regionkey FROM nation", + "This connector does not support creating tables with data"); return; } - assertUpdate("CREATE TABLE IF NOT EXISTS " + tableName + " AS SELECT name, regionkey FROM nation", "SELECT count(*) FROM nation"); + assertUpdate( + "CREATE TABLE IF NOT EXISTS " + tableName + " AS SELECT name, regionkey FROM nation", + "SELECT count(*) FROM nation"); assertTableColumnNames(tableName, "name", "regionkey"); - assertNull(getTableComment(getSession().getCatalog().orElseThrow(), getSession().getSchema().orElseThrow(), tableName)); + assertNull( + getTableComment( + getSession().getCatalog().orElseThrow(), + getSession().getSchema().orElseThrow(), + tableName)); assertUpdate("DROP TABLE " + tableName); - // Some connectors support CREATE TABLE AS but not the ordinary CREATE TABLE. Let's test CTAS IF NOT EXISTS with a table that is guaranteed to exist. + // Some connectors support CREATE TABLE AS but not the ordinary CREATE TABLE. Let's test CTAS IF + // NOT EXISTS with a table that is guaranteed to exist. assertUpdate("CREATE TABLE IF NOT EXISTS nation AS SELECT custkey, acctbal FROM customer", 0); assertTableColumnNames("nation", "nationkey", "name", "regionkey", "comment"); assertCreateTableAsSelect( - "SELECT custkey, address, acctbal FROM customer", - "SELECT count(*) FROM customer"); + "SELECT custkey, address, acctbal FROM customer", "SELECT count(*) FROM customer"); assertCreateTableAsSelect( "SELECT mktsegment, sum(acctbal) x FROM customer GROUP BY mktsegment", @@ -1881,8 +2333,7 @@ public void testCreateTableAsSelect() { "SELECT 1"); assertCreateTableAsSelect( - "SELECT custkey FROM customer ORDER BY custkey LIMIT 10", - "SELECT 10"); + "SELECT custkey FROM customer ORDER BY custkey LIMIT 10", "SELECT 10"); assertCreateTableAsSelect( "SELECT * FROM customer WITH DATA", @@ -1890,35 +2341,31 @@ public void testCreateTableAsSelect() { "SELECT count(*) FROM customer"); assertCreateTableAsSelect( - "SELECT * FROM customer WITH NO DATA", - "SELECT * FROM customer LIMIT 0", - "SELECT 0"); + "SELECT * FROM customer WITH NO DATA", "SELECT * FROM customer LIMIT 0", "SELECT 0"); // Tests for CREATE TABLE with UNION ALL: exercises PushTableWriteThroughUnion optimizer assertCreateTableAsSelect( - "SELECT name, custkey, acctbal FROM customer WHERE custkey % 2 = 0 UNION ALL " + - "SELECT name, custkey, acctbal FROM customer WHERE custkey % 2 = 1", - "SELECT name, custkey, acctbal FROM customer", - "SELECT count(*) FROM customer"); + "SELECT name, custkey, acctbal FROM customer WHERE custkey % 2 = 0 UNION ALL " + + "SELECT name, custkey, acctbal FROM customer WHERE custkey % 2 = 1", + "SELECT name, custkey, acctbal FROM customer", "SELECT count(*) FROM customer"); assertCreateTableAsSelect( Session.builder(getSession()).setSystemProperty("redistribute_writes", "true").build(), - "SELECT CAST(custkey AS BIGINT) custkey, acctbal FROM customer UNION ALL " + - "SELECT 1234567890, 1.23", - "SELECT custkey, acctbal FROM customer UNION ALL " + - "SELECT 1234567890, 1.23", + "SELECT CAST(custkey AS BIGINT) custkey, acctbal FROM customer UNION ALL " + + "SELECT 1234567890, 1.23", + "SELECT custkey, acctbal FROM customer UNION ALL " + "SELECT 1234567890, 1.23", "SELECT count(*) + 1 FROM customer"); assertCreateTableAsSelect( Session.builder(getSession()).setSystemProperty("redistribute_writes", "false").build(), - "SELECT CAST(custkey AS BIGINT) custkey, acctbal FROM customer UNION ALL " + - "SELECT 1234567890, 1.23", - "SELECT custkey, acctbal FROM customer UNION ALL " + - "SELECT 1234567890, 1.23", + "SELECT CAST(custkey AS BIGINT) custkey, acctbal FROM customer UNION ALL " + + "SELECT 1234567890, 1.23", + "SELECT custkey, acctbal FROM customer UNION ALL " + "SELECT 1234567890, 1.23", "SELECT count(*) + 1 FROM customer"); - assertExplainAnalyze("EXPLAIN ANALYZE CREATE TABLE " + tableName + " AS SELECT mktsegment FROM customer"); + assertExplainAnalyze( + "EXPLAIN ANALYZE CREATE TABLE " + tableName + " AS SELECT mktsegment FROM customer"); assertQuery("SELECT * from " + tableName, "SELECT mktsegment FROM customer"); assertUpdate("DROP TABLE " + tableName); } @@ -1942,20 +2389,26 @@ public void testCreateTableAsSelectSchemaNotFound() { public void testCreateTableAsSelectWithUnicode() { // Covered by testCreateTableAsSelect skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); - assertCreateTableAsSelect( - "SELECT '\u2603' unicode", - "SELECT 1"); + assertCreateTableAsSelect("SELECT '\u2603' unicode", "SELECT 1"); } - protected void assertCreateTableAsSelect(@Language("SQL") String query, @Language("SQL") String rowCountQuery) { + protected void assertCreateTableAsSelect( + @Language("SQL") String query, @Language("SQL") String rowCountQuery) { assertCreateTableAsSelect(getSession(), query, query, rowCountQuery); } - protected void assertCreateTableAsSelect(@Language("SQL") String query, @Language("SQL") String expectedQuery, @Language("SQL") String rowCountQuery) { + protected void assertCreateTableAsSelect( + @Language("SQL") String query, + @Language("SQL") String expectedQuery, + @Language("SQL") String rowCountQuery) { assertCreateTableAsSelect(getSession(), query, expectedQuery, rowCountQuery); } - protected void assertCreateTableAsSelect(Session session, @Language("SQL") String query, @Language("SQL") String expectedQuery, @Language("SQL") String rowCountQuery) { + protected void assertCreateTableAsSelect( + Session session, + @Language("SQL") String query, + @Language("SQL") String expectedQuery, + @Language("SQL") String rowCountQuery) { String table = "test_ctas_" + randomNameSuffix(); assertUpdate(session, "CREATE TABLE " + table + " AS " + query, rowCountQuery); assertQuery(session, "SELECT * FROM " + table, expectedQuery); @@ -1969,18 +2422,24 @@ public void testCreateTableAsSelectNegativeDate() { String tableName = "negative_date_" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_CREATE_TABLE_WITH_DATA)) { - assertQueryFails(format("CREATE TABLE %s AS SELECT DATE '-0001-01-01' AS dt", tableName), "This connector does not support creating tables with data"); + assertQueryFails( + format("CREATE TABLE %s AS SELECT DATE '-0001-01-01' AS dt", tableName), + "This connector does not support creating tables with data"); return; } if (!hasBehavior(SUPPORTS_NEGATIVE_DATE)) { - assertQueryFails(format("CREATE TABLE %s AS SELECT DATE '-0001-01-01' AS dt", tableName), errorMessageForCreateTableAsSelectNegativeDate("-0001-01-01")); + assertQueryFails( + format("CREATE TABLE %s AS SELECT DATE '-0001-01-01' AS dt", tableName), + errorMessageForCreateTableAsSelectNegativeDate("-0001-01-01")); return; } try { assertUpdate(format("CREATE TABLE %s AS SELECT DATE '-0001-01-01' AS dt", tableName), 1); assertQuery("SELECT * FROM " + tableName, "VALUES DATE '-0001-01-01'"); - assertQuery(format("SELECT * FROM %s WHERE dt = DATE '-0001-01-01'", tableName), "VALUES DATE '-0001-01-01'"); + assertQuery( + format("SELECT * FROM %s WHERE dt = DATE '-0001-01-01'", tableName), + "VALUES DATE '-0001-01-01'"); } finally { assertUpdate("DROP TABLE IF EXISTS " + tableName); } @@ -1999,7 +2458,9 @@ public void testRenameTable() { String renamedTable = "test_rename_new_" + randomNameSuffix(); if (!hasBehavior(SUPPORTS_RENAME_TABLE)) { - assertQueryFails("ALTER TABLE " + tableName + " RENAME TO " + renamedTable, "This connector does not support renaming tables"); + assertQueryFails( + "ALTER TABLE " + tableName + " RENAME TO " + renamedTable, + "This connector does not support renaming tables"); assertUpdate("DROP TABLE " + tableName); return; } @@ -2011,10 +2472,13 @@ public void testRenameTable() { assertUpdate("ALTER TABLE IF EXISTS " + renamedTable + " RENAME TO " + testExistsTableName); assertQuery("SELECT x FROM " + testExistsTableName, "VALUES 123"); - String uppercaseName = "TEST_RENAME_" + randomNameSuffix(); // Test an upper-case, not delimited identifier + String uppercaseName = + "TEST_RENAME_" + randomNameSuffix(); // Test an upper-case, not delimited identifier assertUpdate("ALTER TABLE " + testExistsTableName + " RENAME TO " + uppercaseName); assertQuery( - "SELECT x FROM " + uppercaseName.toLowerCase(ENGLISH), // Ensure select allows for lower-case, not delimited identifier + "SELECT x FROM " + + uppercaseName.toLowerCase( + ENGLISH), // Ensure select allows for lower-case, not delimited identifier "VALUES 123"); assertUpdate("DROP TABLE " + uppercaseName); @@ -2033,16 +2497,20 @@ public void testRenameTableAcrossSchema() { if (!hasBehavior(SUPPORTS_RENAME_TABLE)) { throw new SkipException("Skipping since rename table is not supported at all"); } - assertQueryFails("ALTER TABLE nation RENAME TO other_schema.yyyy", "This connector does not support renaming tables across schemas"); + assertQueryFails( + "ALTER TABLE nation RENAME TO other_schema.yyyy", + "This connector does not support renaming tables across schemas"); return; } if (!hasBehavior(SUPPORTS_CREATE_SCHEMA)) { - throw new AssertionError("Cannot test ALTER TABLE RENAME across schemas without CREATE SCHEMA, the test needs to be implemented in a connector-specific way"); + throw new AssertionError( + "Cannot test ALTER TABLE RENAME across schemas without CREATE SCHEMA, the test needs to be implemented in a connector-specific way"); } if (!hasBehavior(SUPPORTS_CREATE_TABLE)) { - throw new AssertionError("Cannot test ALTER TABLE RENAME across schemas without CREATE TABLE, the test needs to be implemented in a connector-specific way"); + throw new AssertionError( + "Cannot test ALTER TABLE RENAME across schemas without CREATE TABLE, the test needs to be implemented in a connector-specific way"); } String tableName = "test_rename_old_" + randomNameSuffix(); @@ -2061,12 +2529,18 @@ public void testRenameTableAcrossSchema() { assertUpdate("DROP SCHEMA " + schemaName); assertFalse(getQueryRunner().tableExists(getSession(), tableName)); - assertFalse(getQueryRunner().tableExists(Session.builder(getSession()).setSchema(schemaName).build(), renamedTable)); + assertFalse( + getQueryRunner() + .tableExists( + Session.builder(getSession()).setSchema(schemaName).build(), renamedTable)); } @Test public void testRenameTableToUnqualifiedPreservesSchema() { - skipTestUnless(hasBehavior(SUPPORTS_CREATE_SCHEMA) && hasBehavior(SUPPORTS_CREATE_TABLE) && hasBehavior(SUPPORTS_RENAME_TABLE)); + skipTestUnless( + hasBehavior(SUPPORTS_CREATE_SCHEMA) + && hasBehavior(SUPPORTS_CREATE_TABLE) + && hasBehavior(SUPPORTS_RENAME_TABLE)); String sourceSchemaName = "test_source_schema_" + randomNameSuffix(); assertUpdate(createSchemaSql(sourceSchemaName)); @@ -2075,7 +2549,8 @@ public void testRenameTableToUnqualifiedPreservesSchema() { assertUpdate("CREATE TABLE " + sourceSchemaName + "." + tableName + " AS SELECT 123 x", 1); String renamedTable = "test_rename_unqualified_name_new_" + randomNameSuffix(); - assertUpdate("ALTER TABLE " + sourceSchemaName + "." + tableName + " RENAME TO " + renamedTable); + assertUpdate( + "ALTER TABLE " + sourceSchemaName + "." + tableName + " RENAME TO " + renamedTable); assertQuery("SELECT x FROM " + sourceSchemaName + "." + renamedTable, "VALUES 123"); assertUpdate("DROP TABLE " + sourceSchemaName + "." + renamedTable); @@ -2085,31 +2560,43 @@ public void testRenameTableToUnqualifiedPreservesSchema() { @Test public void testCommentTable() { if (!hasBehavior(SUPPORTS_COMMENT_ON_TABLE)) { - assertQueryFails("COMMENT ON TABLE nation IS 'new comment'", "This connector does not support setting table comments"); + assertQueryFails( + "COMMENT ON TABLE nation IS 'new comment'", + "This connector does not support setting table comments"); return; } String catalogName = getSession().getCatalog().orElseThrow(); String schemaName = getSession().getSchema().orElseThrow(); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_comment_", "(a integer)")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_comment_", "(a integer)")) { // comment set assertUpdate("COMMENT ON TABLE " + table.getName() + " IS 'new comment'"); - assertThat((String) computeActual("SHOW CREATE TABLE " + table.getName()).getOnlyValue()).contains("COMMENT 'new comment'"); - assertThat(getTableComment(catalogName, schemaName, table.getName())).isEqualTo("new comment"); - assertThat(query( - "SELECT table_name, comment FROM system.metadata.table_comments " + - "WHERE catalog_name = '" + catalogName + "' AND " + - "schema_name = '" + schemaName + "'")) + assertThat((String) computeActual("SHOW CREATE TABLE " + table.getName()).getOnlyValue()) + .contains("COMMENT 'new comment'"); + assertThat(getTableComment(catalogName, schemaName, table.getName())) + .isEqualTo("new comment"); + assertThat( + query( + "SELECT table_name, comment FROM system.metadata.table_comments " + + "WHERE catalog_name = '" + + catalogName + + "' AND " + + "schema_name = '" + + schemaName + + "'")) .skippingTypesCheck() .containsAll("VALUES ('" + table.getName() + "', 'new comment')"); // comment updated assertUpdate("COMMENT ON TABLE " + table.getName() + " IS 'updated comment'"); - assertThat(getTableComment(catalogName, schemaName, table.getName())).isEqualTo("updated comment"); + assertThat(getTableComment(catalogName, schemaName, table.getName())) + .isEqualTo("updated comment"); // comment set to empty or deleted assertUpdate("COMMENT ON TABLE " + table.getName() + " IS ''"); - assertThat(getTableComment(catalogName, schemaName, table.getName())).isIn("", null); // Some storages do not preserve empty comment + assertThat(getTableComment(catalogName, schemaName, table.getName())) + .isIn("", null); // Some storages do not preserve empty comment // comment deleted assertUpdate("COMMENT ON TABLE " + table.getName() + " IS 'a comment'"); @@ -2122,28 +2609,36 @@ public void testCommentTable() { try { // comment set when creating a table assertUpdate("CREATE TABLE " + tableName + "(key integer) COMMENT 'new table comment'"); - assertThat(getTableComment(catalogName, schemaName, tableName)).isEqualTo("new table comment"); + assertThat(getTableComment(catalogName, schemaName, tableName)) + .isEqualTo("new table comment"); } finally { assertUpdate("DROP TABLE IF EXISTS " + tableName); } } private String getTableComment(String catalogName, String schemaName, String tableName) { - String sql = format("SELECT comment FROM system.metadata.table_comments WHERE catalog_name = '%s' AND schema_name = '%s' AND table_name = '%s'", catalogName, schemaName, tableName); + String sql = + format( + "SELECT comment FROM system.metadata.table_comments WHERE catalog_name = '%s' AND schema_name = '%s' AND table_name = '%s'", + catalogName, schemaName, tableName); return (String) computeActual(sql).getOnlyValue(); } @Test public void testCommentColumn() { if (!hasBehavior(SUPPORTS_COMMENT_ON_COLUMN)) { - assertQueryFails("COMMENT ON COLUMN nation.nationkey IS 'new comment'", "This connector does not support setting column comments"); + assertQueryFails( + "COMMENT ON COLUMN nation.nationkey IS 'new comment'", + "This connector does not support setting column comments"); return; } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_comment_column_", "(a integer)")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_comment_column_", "(a integer)")) { // comment set assertUpdate("COMMENT ON COLUMN " + table.getName() + ".a IS 'new comment'"); - assertThat((String) computeActual("SHOW CREATE TABLE " + table.getName()).getOnlyValue()).contains("COMMENT 'new comment'"); + assertThat((String) computeActual("SHOW CREATE TABLE " + table.getName()).getOnlyValue()) + .contains("COMMENT 'new comment'"); assertThat(getColumnComment(table.getName(), "a")).isEqualTo("new comment"); // comment updated @@ -2152,7 +2647,8 @@ public void testCommentColumn() { // comment set to empty or deleted assertUpdate("COMMENT ON COLUMN " + table.getName() + ".a IS ''"); - assertThat(getColumnComment(table.getName(), "a")).isIn("", null); // Some storages do not preserve empty comment + assertThat(getColumnComment(table.getName(), "a")) + .isIn("", null); // Some storages do not preserve empty comment // comment deleted assertUpdate("COMMENT ON COLUMN " + table.getName() + ".a IS 'a comment'"); @@ -2162,30 +2658,33 @@ public void testCommentColumn() { } // TODO: comment set when creating a table - // assertUpdate("CREATE TABLE " + tableName + "(a integer COMMENT 'new column comment')"); + // assertUpdate("CREATE TABLE " + tableName + "(a integer COMMENT 'new column + // comment')"); // assertThat(getColumnComment(tableName, "a")).isEqualTo("new column comment"); // assertUpdate("DROP TABLE " + tableName); } protected String getColumnComment(String tableName, String columnName) { - MaterializedResult materializedResult = computeActual(format( - "SELECT comment FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s' AND column_name = '%s'", - getSession().getSchema().orElseThrow(), - tableName, - columnName)); + MaterializedResult materializedResult = + computeActual( + format( + "SELECT comment FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s' AND column_name = '%s'", + getSession().getSchema().orElseThrow(), tableName, columnName)); return (String) materializedResult.getOnlyValue(); } @Test public void testInsert() { if (!hasBehavior(SUPPORTS_INSERT)) { - assertQueryFails("INSERT INTO nation(nationkey) VALUES (42)", "This connector does not support inserts"); + assertQueryFails( + "INSERT INTO nation(nationkey) VALUES (42)", "This connector does not support inserts"); return; } String query = "SELECT phone, custkey, acctbal FROM customer"; - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_", "AS " + query + " WITH NO DATA")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_insert_", "AS " + query + " WITH NO DATA")) { assertQuery("SELECT count(*) FROM " + table.getName() + "", "SELECT 0"); assertUpdate("INSERT INTO " + table.getName() + " " + query, "SELECT count(*) FROM customer"); @@ -2195,25 +2694,31 @@ public void testInsert() { assertUpdate("INSERT INTO " + table.getName() + " (custkey) VALUES (-1)", 1); assertUpdate("INSERT INTO " + table.getName() + " (custkey) VALUES (null)", 1); assertUpdate("INSERT INTO " + table.getName() + " (phone) VALUES ('3283-2001-01-01')", 1); - assertUpdate("INSERT INTO " + table.getName() + " (custkey, phone) VALUES (-2, '3283-2001-01-02')", 1); - assertUpdate("INSERT INTO " + table.getName() + " (phone, custkey) VALUES ('3283-2001-01-03', -3)", 1); + assertUpdate( + "INSERT INTO " + table.getName() + " (custkey, phone) VALUES (-2, '3283-2001-01-02')", 1); + assertUpdate( + "INSERT INTO " + table.getName() + " (phone, custkey) VALUES ('3283-2001-01-03', -3)", 1); assertUpdate("INSERT INTO " + table.getName() + " (acctbal) VALUES (1234)", 1); - assertQuery("SELECT * FROM " + table.getName() + "", query - + " UNION ALL SELECT null, -1, null" - + " UNION ALL SELECT null, null, null" - + " UNION ALL SELECT '3283-2001-01-01', null, null" - + " UNION ALL SELECT '3283-2001-01-02', -2, null" - + " UNION ALL SELECT '3283-2001-01-03', -3, null" - + " UNION ALL SELECT null, null, 1234"); + assertQuery( + "SELECT * FROM " + table.getName() + "", + query + + " UNION ALL SELECT null, -1, null" + + " UNION ALL SELECT null, null, null" + + " UNION ALL SELECT '3283-2001-01-01', null, null" + + " UNION ALL SELECT '3283-2001-01-02', -2, null" + + " UNION ALL SELECT '3283-2001-01-03', -3, null" + + " UNION ALL SELECT null, null, 1234"); // UNION query produces columns in the opposite order // of how they are declared in the table schema assertUpdate( - "INSERT INTO " + table.getName() + " (custkey, phone, acctbal) " + - "SELECT custkey, phone, acctbal FROM customer " + - "UNION ALL " + - "SELECT custkey, phone, acctbal FROM customer", + "INSERT INTO " + + table.getName() + + " (custkey, phone, acctbal) " + + "SELECT custkey, phone, acctbal FROM customer " + + "UNION ALL " + + "SELECT custkey, phone, acctbal FROM customer", "SELECT 2 * count(*) FROM customer"); } } @@ -2223,12 +2728,20 @@ public void testInsertForDefaultColumn() { skipTestUnless(hasBehavior(SUPPORTS_INSERT)); try (TestTable testTable = createTableWithDefaultColumns()) { - assertUpdate(format("INSERT INTO %s (col_required, col_required2) VALUES (1, 10)", testTable.getName()), 1); + assertUpdate( + format( + "INSERT INTO %s (col_required, col_required2) VALUES (1, 10)", testTable.getName()), + 1); assertUpdate(format("INSERT INTO %s VALUES (2, 3, 4, 5, 6)", testTable.getName()), 1); assertUpdate(format("INSERT INTO %s VALUES (7, null, null, 8, 9)", testTable.getName()), 1); - assertUpdate(format("INSERT INTO %s (col_required2, col_required) VALUES (12, 13)", testTable.getName()), 1); + assertUpdate( + format( + "INSERT INTO %s (col_required2, col_required) VALUES (12, 13)", testTable.getName()), + 1); - assertQuery("SELECT * FROM " + testTable.getName(), "VALUES (1, null, 43, 42, 10), (2, 3, 4, 5, 6), (7, null, null, 8, 9), (13, null, 43, 42, 12)"); + assertQuery( + "SELECT * FROM " + testTable.getName(), + "VALUES (1, null, 43, 42, 10), (2, 3, 4, 5, 6), (7, null, null, 8, 9), (13, null, 43, 42, 12)"); } } @@ -2240,13 +2753,19 @@ protected TestTable createTableWithDefaultColumns() { public void testInsertUnicode() { skipTestUnless(hasBehavior(SUPPORTS_INSERT)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { - assertUpdate("INSERT INTO " + table.getName() + "(test) VALUES 'Hello', U&'hello\\6d4B\\8Bd5world\\7F16\\7801' ", 2); + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { + assertUpdate( + "INSERT INTO " + + table.getName() + + "(test) VALUES 'Hello', U&'hello\\6d4B\\8Bd5world\\7F16\\7801' ", + 2); assertThat(computeActual("SELECT test FROM " + table.getName()).getOnlyColumnAsSet()) .containsExactlyInAnyOrder("Hello", "hello测试world编码"); } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { assertUpdate("INSERT INTO " + table.getName() + "(test) VALUES 'aa', 'bé'", 2); assertQuery("SELECT test FROM " + table.getName(), "VALUES 'aa', 'bé'"); assertQuery("SELECT test FROM " + table.getName() + " WHERE test = 'aa'", "VALUES 'aa'"); @@ -2255,7 +2774,8 @@ public void testInsertUnicode() { assertQueryReturnsEmptyResult("SELECT test FROM " + table.getName() + " WHERE test = 'ba'"); } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { assertUpdate("INSERT INTO " + table.getName() + "(test) VALUES 'a', 'é'", 2); assertQuery("SELECT test FROM " + table.getName(), "VALUES 'a', 'é'"); assertQuery("SELECT test FROM " + table.getName() + " WHERE test = 'a'", "VALUES 'a'"); @@ -2269,8 +2789,13 @@ public void testInsertUnicode() { public void testInsertHighestUnicodeCharacter() { skipTestUnless(hasBehavior(SUPPORTS_INSERT)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { - assertUpdate("INSERT INTO " + table.getName() + "(test) VALUES 'Hello', U&'hello\\6d4B\\8Bd5\\+10FFFFworld\\7F16\\7801' ", 2); + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_insert_unicode_", "(test varchar(50))")) { + assertUpdate( + "INSERT INTO " + + table.getName() + + "(test) VALUES 'Hello', U&'hello\\6d4B\\8Bd5\\+10FFFFworld\\7F16\\7801' ", + 2); assertThat(computeActual("SELECT test FROM " + table.getName()).getOnlyColumnAsSet()) .containsExactlyInAnyOrder("Hello", "hello测试􏿿world编码"); } @@ -2288,9 +2813,14 @@ public void testInsertArray() { throw new SkipException("not supported"); } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_insert_array_", "(a ARRAY, b ARRAY)")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "test_insert_array_", + "(a ARRAY, b ARRAY)")) { assertUpdate("INSERT INTO " + table.getName() + " (a) VALUES (ARRAY[null])", 1); - assertUpdate("INSERT INTO " + table.getName() + " (a, b) VALUES (ARRAY[1.23E1], ARRAY[1.23E1])", 1); + assertUpdate( + "INSERT INTO " + table.getName() + " (a, b) VALUES (ARRAY[1.23E1], ARRAY[1.23E1])", 1); assertQuery("SELECT a[1], b[1] FROM " + table.getName(), "VALUES (null, null), (12.3, 12)"); } } @@ -2298,15 +2828,20 @@ public void testInsertArray() { @Test public void testInsertNegativeDate() { if (!hasBehavior(SUPPORTS_INSERT)) { - assertQueryFails("INSERT INTO orders (orderdate) VALUES (DATE '-0001-01-01')", "This connector does not support inserts"); + assertQueryFails( + "INSERT INTO orders (orderdate) VALUES (DATE '-0001-01-01')", + "This connector does not support inserts"); return; } if (!hasBehavior(SUPPORTS_CREATE_TABLE)) { - throw new AssertionError("Cannot test INSERT negative dates without CREATE TABLE, the test needs to be implemented in a connector-specific way"); + throw new AssertionError( + "Cannot test INSERT negative dates without CREATE TABLE, the test needs to be implemented in a connector-specific way"); } if (!hasBehavior(SUPPORTS_NEGATIVE_DATE)) { try (TestTable table = new TestTable(getQueryRunner()::execute, "insert_date", "(dt DATE)")) { - assertQueryFails(format("INSERT INTO %s VALUES (DATE '-0001-01-01')", table.getName()), errorMessageForInsertNegativeDate("-0001-01-01")); + assertQueryFails( + format("INSERT INTO %s VALUES (DATE '-0001-01-01')", table.getName()), + errorMessageForInsertNegativeDate("-0001-01-01")); } return; } @@ -2314,7 +2849,9 @@ public void testInsertNegativeDate() { try (TestTable table = new TestTable(getQueryRunner()::execute, "insert_date", "(dt DATE)")) { assertUpdate(format("INSERT INTO %s VALUES (DATE '-0001-01-01')", table.getName()), 1); assertQuery("SELECT * FROM " + table.getName(), "VALUES DATE '-0001-01-01'"); - assertQuery(format("SELECT * FROM %s WHERE dt = DATE '-0001-01-01'", table.getName()), "VALUES DATE '-0001-01-01'"); + assertQuery( + format("SELECT * FROM %s WHERE dt = DATE '-0001-01-01'", table.getName()), + "VALUES DATE '-0001-01-01'"); } } @@ -2324,20 +2861,24 @@ protected String errorMessageForInsertNegativeDate(String date) { } protected boolean isReportingWrittenBytesSupported(Session session) { - CatalogName catalogName = session.getCatalog() - .map(CatalogName::new) - .orElseThrow(); + CatalogName catalogName = session.getCatalog().map(CatalogName::new).orElseThrow(); Metadata metadata = getQueryRunner().getMetadata(); metadata.getCatalogHandle(session, catalogName.getCatalogName()); - QualifiedObjectName fullTableName = new QualifiedObjectName(catalogName.getCatalogName(), "any", "any"); - return getQueryRunner().getMetadata().supportsReportingWrittenBytes(session, fullTableName, ImmutableMap.of()); + QualifiedObjectName fullTableName = + new QualifiedObjectName(catalogName.getCatalogName(), "any", "any"); + return getQueryRunner() + .getMetadata() + .supportsReportingWrittenBytes(session, fullTableName, ImmutableMap.of()); } @Test public void isReportingWrittenBytesSupported() { transaction(getQueryRunner().getTransactionManager(), getQueryRunner().getAccessControl()) .singleStatement() - .execute(getSession(), (Consumer) session -> skipTestUnless(isReportingWrittenBytesSupported(session))); + .execute( + getSession(), + (Consumer) + session -> skipTestUnless(isReportingWrittenBytesSupported(session))); @Language("SQL") String query = "CREATE TABLE temp AS SELECT * FROM tpch.tiny.nation"; @@ -2345,9 +2886,9 @@ public void isReportingWrittenBytesSupported() { assertQueryStats( getSession(), query, - queryStats -> assertThat(queryStats.getPhysicalWrittenDataSize().toBytes()).isGreaterThan(0L), - results -> { - }); + queryStats -> + assertThat(queryStats.getPhysicalWrittenDataSize().toBytes()).isGreaterThan(0L), + results -> {}); } @Test @@ -2357,22 +2898,37 @@ public void testInsertIntoNotNullColumn() { if (!hasBehavior(SUPPORTS_NOT_NULL_CONSTRAINT)) { assertQueryFails( "CREATE TABLE not_null_constraint (not_null_col INTEGER NOT NULL)", - format("line 1:35: Catalog '%s' does not support non-null column for column name 'not_null_col'", getSession().getCatalog().orElseThrow())); + format( + "line 1:35: Catalog '%s' does not support non-null column for column name 'not_null_col'", + getSession().getCatalog().orElseThrow())); return; } - try (TestTable table = new TestTable(getQueryRunner()::execute, "insert_not_null", "(nullable_col INTEGER, not_null_col INTEGER NOT NULL)")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "insert_not_null", + "(nullable_col INTEGER, not_null_col INTEGER NOT NULL)")) { assertUpdate(format("INSERT INTO %s (not_null_col) VALUES (2)", table.getName()), 1); assertQuery("SELECT * FROM " + table.getName(), "VALUES (NULL, 2)"); - // The error message comes from remote databases when ConnectorMetadata.supportsMissingColumnsOnInsert is true - assertQueryFails(format("INSERT INTO %s (nullable_col) VALUES (1)", table.getName()), errorMessageForInsertIntoNotNullColumn("not_null_col")); + // The error message comes from remote databases when + // ConnectorMetadata.supportsMissingColumnsOnInsert is true + assertQueryFails( + format("INSERT INTO %s (nullable_col) VALUES (1)", table.getName()), + errorMessageForInsertIntoNotNullColumn("not_null_col")); } - try (TestTable table = new TestTable(getQueryRunner()::execute, "commuted_not_null", "(nullable_col BIGINT, not_null_col BIGINT NOT NULL)")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "commuted_not_null", + "(nullable_col BIGINT, not_null_col BIGINT NOT NULL)")) { assertUpdate(format("INSERT INTO %s (not_null_col) VALUES (2)", table.getName()), 1); assertQuery("SELECT * FROM " + table.getName(), "VALUES (NULL, 2)"); // This is enforced by the engine and not the connector - assertQueryFails(format("INSERT INTO %s (not_null_col, nullable_col) VALUES (NULL, 3)", table.getName()), "NULL value not allowed for NOT NULL column: not_null_col"); + assertQueryFails( + format("INSERT INTO %s (not_null_col, nullable_col) VALUES (NULL, 3)", table.getName()), + "NULL value not allowed for NOT NULL column: not_null_col"); } } @@ -2384,12 +2940,12 @@ protected String errorMessageForInsertIntoNotNullColumn(String columnName) { @Test public void testInsertInTransaction() { skipTestUnless(hasBehavior(SUPPORTS_INSERT)); - skipTestUnless(hasBehavior(SUPPORTS_MULTI_STATEMENT_WRITES)); // covered by testWriteNotAllowedInTransaction + skipTestUnless( + hasBehavior( + SUPPORTS_MULTI_STATEMENT_WRITES)); // covered by testWriteNotAllowedInTransaction - try (TestTable table = new TestTable( - getQueryRunner()::execute, - "test_tx_insert", - "(a bigint)")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_tx_insert", "(a bigint)")) { String tableName = table.getName(); inTransaction(session -> assertUpdate(session, "INSERT INTO " + tableName + " VALUES 42", 1)); assertQuery("TABLE " + tableName, "VALUES 42"); @@ -2401,35 +2957,50 @@ public void testDelete() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); // delete successive parts of the table - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { - assertUpdate("DELETE FROM " + table.getName() + " WHERE custkey <= 100", "SELECT count(*) FROM orders WHERE custkey <= 100"); + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE custkey <= 100", + "SELECT count(*) FROM orders WHERE custkey <= 100"); assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE custkey > 100"); - assertUpdate("DELETE FROM " + table.getName() + " WHERE custkey <= 300", "SELECT count(*) FROM orders WHERE custkey > 100 AND custkey <= 300"); + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE custkey <= 300", + "SELECT count(*) FROM orders WHERE custkey > 100 AND custkey <= 300"); assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE custkey > 300"); - assertUpdate("DELETE FROM " + table.getName() + " WHERE custkey <= 500", "SELECT count(*) FROM orders WHERE custkey > 300 AND custkey <= 500"); + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE custkey <= 500", + "SELECT count(*) FROM orders WHERE custkey > 300 AND custkey <= 500"); assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE custkey > 500"); } // delete without matching any rows - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { assertUpdate("DELETE FROM " + table.getName() + " WHERE orderkey < 0", 0); } // delete with a predicate that optimizes to false - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_delete_", "AS SELECT * FROM orders")) { assertUpdate("DELETE FROM " + table.getName() + " WHERE orderkey > 5 AND orderkey < 4", 0); } String tableName = "test_delete_" + randomNameSuffix(); try { // test EXPLAIN ANALYZE with CTAS - assertExplainAnalyze("EXPLAIN ANALYZE CREATE TABLE " + tableName + " AS SELECT CAST(orderstatus AS VARCHAR(15)) orderstatus FROM orders"); + assertExplainAnalyze( + "EXPLAIN ANALYZE CREATE TABLE " + + tableName + + " AS SELECT CAST(orderstatus AS VARCHAR(15)) orderstatus FROM orders"); assertQuery("SELECT * from " + tableName, "SELECT orderstatus FROM orders"); // check that INSERT works also - assertExplainAnalyze("EXPLAIN ANALYZE INSERT INTO " + tableName + " SELECT clerk FROM orders"); - assertQuery("SELECT * from " + tableName, "SELECT orderstatus FROM orders UNION ALL SELECT clerk FROM orders"); + assertExplainAnalyze( + "EXPLAIN ANALYZE INSERT INTO " + tableName + " SELECT clerk FROM orders"); + assertQuery( + "SELECT * from " + tableName, + "SELECT orderstatus FROM orders UNION ALL SELECT clerk FROM orders"); // check DELETE works with EXPLAIN ANALYZE assertExplainAnalyze("EXPLAIN ANALYZE DELETE FROM " + tableName + " WHERE TRUE"); assertQuery("SELECT COUNT(*) from " + tableName, "SELECT 0"); @@ -2442,9 +3013,12 @@ public void testDelete() { public void testDeleteWithLike() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_with_like_", "AS SELECT * FROM nation")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_with_like_", "AS SELECT * FROM nation")) { assertUpdate("DELETE FROM " + table.getName() + " WHERE name LIKE '%a%'", "VALUES 0"); - assertUpdate("DELETE FROM " + table.getName() + " WHERE name LIKE '%A%'", "SELECT count(*) FROM nation WHERE name LIKE '%A%'"); + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE name LIKE '%A%'", + "SELECT count(*) FROM nation WHERE name LIKE '%A%'"); } } @@ -2452,13 +3026,20 @@ public void testDeleteWithLike() { public void testDeleteWithComplexPredicate() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_complex_", "AS SELECT * FROM orders")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_complex_", "AS SELECT * FROM orders")) { // delete half the table, then delete the rest - assertUpdate("DELETE FROM " + table.getName() + " WHERE orderkey % 2 = 0", "SELECT count(*) FROM orders WHERE orderkey % 2 = 0"); - assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE orderkey % 2 <> 0"); + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE orderkey % 2 = 0", + "SELECT count(*) FROM orders WHERE orderkey % 2 = 0"); + assertQuery( + "SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE orderkey % 2 <> 0"); - assertUpdate("DELETE FROM " + table.getName(), "SELECT count(*) FROM orders WHERE orderkey % 2 <> 0"); + assertUpdate( + "DELETE FROM " + table.getName(), "SELECT count(*) FROM orders WHERE orderkey % 2 <> 0"); assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders LIMIT 0"); assertUpdate("DELETE FROM " + table.getName() + " WHERE rand() < 0", 0); @@ -2469,22 +3050,42 @@ public void testDeleteWithComplexPredicate() { public void testDeleteWithSubquery() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_subquery", "AS SELECT * FROM nation")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_subquery", "AS SELECT * FROM nation")) { // delete using a subquery - assertUpdate("DELETE FROM " + table.getName() + " WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%')", 15); + assertUpdate( + "DELETE FROM " + + table.getName() + + " WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%')", + 15); assertQuery( "SELECT * FROM " + table.getName(), "SELECT * FROM nation WHERE regionkey IN (SELECT regionkey FROM region WHERE name NOT LIKE 'A%')"); } - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_subquery", "AS SELECT * FROM orders")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_subquery", "AS SELECT * FROM orders")) { // delete using a scalar and EXISTS subquery - assertUpdate("DELETE FROM " + table.getName() + " WHERE orderkey = (SELECT orderkey FROM orders ORDER BY orderkey LIMIT 1)", 1); - assertUpdate("DELETE FROM " + table.getName() + " WHERE orderkey = (SELECT orderkey FROM orders WHERE false)", 0); + assertUpdate( + "DELETE FROM " + + table.getName() + + " WHERE orderkey = (SELECT orderkey FROM orders ORDER BY orderkey LIMIT 1)", + 1); + assertUpdate( + "DELETE FROM " + + table.getName() + + " WHERE orderkey = (SELECT orderkey FROM orders WHERE false)", + 0); assertUpdate("DELETE FROM " + table.getName() + " WHERE EXISTS(SELECT 1 WHERE false)", 0); - assertUpdate("DELETE FROM " + table.getName() + " WHERE EXISTS(SELECT 1)", "SELECT count(*) - 1 FROM orders"); + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE EXISTS(SELECT 1)", + "SELECT count(*) - 1 FROM orders"); } } @@ -2496,7 +3097,10 @@ public void testExplainAnalyzeWithDeleteWithSubquery() { // delete using a subquery assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM nation", 25); - assertExplainAnalyze("EXPLAIN ANALYZE DELETE FROM " + tableName + " WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%' LIMIT 1)", + assertExplainAnalyze( + "EXPLAIN ANALYZE DELETE FROM " + + tableName + + " WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%' LIMIT 1)", "SemiJoin.*"); assertUpdate("DROP TABLE " + tableName); } @@ -2505,33 +3109,43 @@ public void testExplainAnalyzeWithDeleteWithSubquery() { public void testDeleteWithSemiJoin() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_semijoin", "AS SELECT * FROM nation")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_semijoin", "AS SELECT * FROM nation")) { // delete with multiple SemiJoin assertUpdate( - "DELETE FROM " + table.getName() + " " + - "WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%') " + - " AND regionkey IN (SELECT regionkey FROM region WHERE length(comment) < 50)", + "DELETE FROM " + + table.getName() + + " " + + "WHERE regionkey IN (SELECT regionkey FROM region WHERE name LIKE 'A%') " + + " AND regionkey IN (SELECT regionkey FROM region WHERE length(comment) < 50)", 10); assertQuery( "SELECT * FROM " + table.getName(), - "SELECT * FROM nation " + - "WHERE regionkey IN (SELECT regionkey FROM region WHERE name NOT LIKE 'A%') " + - " OR regionkey IN (SELECT regionkey FROM region WHERE length(comment) >= 50)"); + "SELECT * FROM nation " + + "WHERE regionkey IN (SELECT regionkey FROM region WHERE name NOT LIKE 'A%') " + + " OR regionkey IN (SELECT regionkey FROM region WHERE length(comment) >= 50)"); } - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_semijoin", "AS SELECT * FROM orders")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_semijoin", "AS SELECT * FROM orders")) { // delete with SemiJoin null handling assertUpdate( - "DELETE FROM " + table.getName() + "\n" + - "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM tpch.tiny.lineitem)) IS NULL\n", - "SELECT count(*) FROM orders\n" + - "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM lineitem)) IS NULL\n"); + "DELETE FROM " + + table.getName() + + "\n" + + "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM tpch.tiny.lineitem)) IS NULL\n", + "SELECT count(*) FROM orders\n" + + "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM lineitem)) IS NULL\n"); assertQuery( "SELECT * FROM " + table.getName(), - "SELECT * FROM orders\n" + - "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM lineitem)) IS NOT NULL\n"); + "SELECT * FROM orders\n" + + "WHERE (orderkey IN (SELECT CASE WHEN orderkey % 3 = 0 THEN NULL ELSE orderkey END FROM lineitem)) IS NOT NULL\n"); } } @@ -2539,16 +3153,25 @@ public void testDeleteWithSemiJoin() { public void testDeleteWithVarcharPredicate() { skipTestUnless(hasBehavior(SUPPORTS_DELETE)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_with_varchar_predicate_", "AS SELECT * FROM orders")) { - assertUpdate("DELETE FROM " + table.getName() + " WHERE orderstatus = 'O'", "SELECT count(*) FROM orders WHERE orderstatus = 'O'"); - assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE orderstatus <> 'O'"); + try (TestTable table = + new TestTable( + getQueryRunner()::execute, + "test_delete_with_varchar_predicate_", + "AS SELECT * FROM orders")) { + assertUpdate( + "DELETE FROM " + table.getName() + " WHERE orderstatus = 'O'", + "SELECT count(*) FROM orders WHERE orderstatus = 'O'"); + assertQuery( + "SELECT * FROM " + table.getName(), "SELECT * FROM orders WHERE orderstatus <> 'O'"); } } @Test public void testDeleteAllDataFromTable() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE) && hasBehavior(SUPPORTS_DELETE)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_all_data", "AS SELECT * FROM region")) { + try (TestTable table = + new TestTable( + getQueryRunner()::execute, "test_delete_all_data", "AS SELECT * FROM region")) { // not using assertUpdate as some connectors provide update count and some not getQueryRunner().execute("DELETE FROM " + table.getName()); assertQuery("SELECT count(*) FROM " + table.getName(), "VALUES 0"); @@ -2558,8 +3181,10 @@ public void testDeleteAllDataFromTable() { @Test public void testRowLevelDelete() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE) && hasBehavior(SUPPORTS_ROW_LEVEL_DELETE)); - // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_row_delete", "AS SELECT * FROM region")) { + // TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version + // is updated + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_row_delete", "AS SELECT * FROM region")) { assertUpdate("DELETE FROM " + table.getName() + " WHERE regionkey = 2", 1); assertQuery("SELECT count(*) FROM " + table.getName(), "VALUES 4"); } @@ -2569,22 +3194,29 @@ public void testRowLevelDelete() { public void testUpdate() { if (!hasBehavior(SUPPORTS_UPDATE)) { // Note this change is a no-op, if actually run - assertQueryFails("UPDATE nation SET nationkey = nationkey + regionkey WHERE regionkey < 1", MODIFYING_ROWS_MESSAGE); + assertQueryFails( + "UPDATE nation SET nationkey = nationkey + regionkey WHERE regionkey < 1", + MODIFYING_ROWS_MESSAGE); return; } - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_update", "AS TABLE tpch.tiny.nation")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_update", "AS TABLE tpch.tiny.nation")) { String tableName = table.getName(); - assertUpdate("UPDATE " + tableName + " SET nationkey = 100 + nationkey WHERE regionkey = 2", 5); + assertUpdate( + "UPDATE " + tableName + " SET nationkey = 100 + nationkey WHERE regionkey = 2", 5); assertThat(query("SELECT * FROM " + tableName)) .skippingTypesCheck() - .matches("SELECT IF(regionkey=2, nationkey + 100, nationkey) nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + .matches( + "SELECT IF(regionkey=2, nationkey + 100, nationkey) nationkey, name, regionkey, comment FROM tpch.tiny.nation"); // UPDATE after UPDATE - assertUpdate("UPDATE " + tableName + " SET nationkey = nationkey * 2 WHERE regionkey IN (2,3)", 10); + assertUpdate( + "UPDATE " + tableName + " SET nationkey = nationkey * 2 WHERE regionkey IN (2,3)", 10); assertThat(query("SELECT * FROM " + tableName)) .skippingTypesCheck() - .matches("SELECT CASE regionkey WHEN 2 THEN 2*(nationkey+100) WHEN 3 THEN 2*nationkey ELSE nationkey END nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + .matches( + "SELECT CASE regionkey WHEN 2 THEN 2*(nationkey+100) WHEN 3 THEN 2*nationkey ELSE nationkey END nationkey, name, regionkey, comment FROM tpch.tiny.nation"); } } @@ -2614,13 +3246,15 @@ public void testDropTableIfExists() { @Test public void testTruncateTable() { if (!hasBehavior(SUPPORTS_TRUNCATE)) { - assertQueryFails("TRUNCATE TABLE nation", "This connector does not support truncating tables"); + assertQueryFails( + "TRUNCATE TABLE nation", "This connector does not support truncating tables"); return; } skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); - try (TestTable table = new TestTable(getQueryRunner()::execute, "test_truncate", "AS SELECT * FROM region")) { + try (TestTable table = + new TestTable(getQueryRunner()::execute, "test_truncate", "AS SELECT * FROM region")) { assertUpdate("TRUNCATE TABLE " + table.getName()); assertQuery("SELECT count(*) FROM " + table.getName(), "VALUES 0"); } @@ -2631,41 +3265,55 @@ public void testQueryLoggingCount() { skipTestUnless(hasBehavior(SUPPORTS_CREATE_TABLE)); QueryManager queryManager = getDistributedQueryRunner().getCoordinator().getQueryManager(); - executeExclusively(() -> { - assertEventually( - new Duration(1, MINUTES), - () -> assertEquals( - queryManager.getQueries().stream() - .map(BasicQueryInfo::getQueryId) - .map(queryManager::getFullQueryInfo) - .filter(info -> !info.isFinalQueryInfo()) - .collect(toList()), - ImmutableList.of())); - - // We cannot simply get the number of completed queries as soon as all the queries are completed, because this counter may not be up-to-date at that point. - // The completed queries counter is updated in a final query info listener, which is called eventually. - // Therefore, here we wait until the value of this counter gets stable. - - DispatchManager dispatchManager = ((DistributedQueryRunner) getQueryRunner()).getCoordinator().getDispatchManager(); - long beforeCompletedQueriesCount = waitUntilStable(() -> dispatchManager.getStats().getCompletedQueries().getTotalCount(), new Duration(5, SECONDS)); - long beforeSubmittedQueriesCount = dispatchManager.getStats().getSubmittedQueries().getTotalCount(); - String tableName = "test_logging_count" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableName + tableDefinitionForQueryLoggingCount()); - assertQueryReturnsEmptyResult("SELECT foo_1, foo_2_4 FROM " + tableName); - assertUpdate("DROP TABLE " + tableName); - assertQueryFails("SELECT * FROM " + tableName, ".*Table .* does not exist"); - - // TODO: Figure out a better way of synchronization - assertEventually( - new Duration(1, MINUTES), - () -> assertEquals(dispatchManager.getStats().getCompletedQueries().getTotalCount() - beforeCompletedQueriesCount, 4)); - assertEquals(dispatchManager.getStats().getSubmittedQueries().getTotalCount() - beforeSubmittedQueriesCount, 4); - }); + executeExclusively( + () -> { + assertEventually( + new Duration(1, MINUTES), + () -> + assertEquals( + queryManager.getQueries().stream() + .map(BasicQueryInfo::getQueryId) + .map(queryManager::getFullQueryInfo) + .filter(info -> !info.isFinalQueryInfo()) + .collect(toList()), + ImmutableList.of())); + + // We cannot simply get the number of completed queries as soon as all the queries are + // completed, because this counter may not be up-to-date at that point. + // The completed queries counter is updated in a final query info listener, which is + // called eventually. + // Therefore, here we wait until the value of this counter gets stable. + + DispatchManager dispatchManager = + ((DistributedQueryRunner) getQueryRunner()).getCoordinator().getDispatchManager(); + long beforeCompletedQueriesCount = + waitUntilStable( + () -> dispatchManager.getStats().getCompletedQueries().getTotalCount(), + new Duration(5, SECONDS)); + long beforeSubmittedQueriesCount = + dispatchManager.getStats().getSubmittedQueries().getTotalCount(); + String tableName = "test_logging_count" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + tableDefinitionForQueryLoggingCount()); + assertQueryReturnsEmptyResult("SELECT foo_1, foo_2_4 FROM " + tableName); + assertUpdate("DROP TABLE " + tableName); + assertQueryFails("SELECT * FROM " + tableName, ".*Table .* does not exist"); + + // TODO: Figure out a better way of synchronization + assertEventually( + new Duration(1, MINUTES), + () -> + assertEquals( + dispatchManager.getStats().getCompletedQueries().getTotalCount() + - beforeCompletedQueriesCount, + 4)); + assertEquals( + dispatchManager.getStats().getSubmittedQueries().getTotalCount() + - beforeSubmittedQueriesCount, + 4); + }); } - /** - * The table must have two columns foo_1 and foo_2_4 of any type. - */ + /** The table must have two columns foo_1 and foo_2_4 of any type. */ @Language("SQL") protected String tableDefinitionForQueryLoggingCount() { return "(foo_1 int, foo_2_4 int)"; @@ -2688,7 +3336,10 @@ private T waitUntilStable(Supplier computation, Duration timeout) { @Test public void testShowSchemasFromOther() { MaterializedResult result = computeActual("SHOW SCHEMAS FROM tpch"); - assertTrue(result.getOnlyColumnAsSet().containsAll(ImmutableSet.of(INFORMATION_SCHEMA, "tiny", "sf1"))); + assertTrue( + result + .getOnlyColumnAsSet() + .containsAll(ImmutableSet.of(INFORMATION_SCHEMA, "tiny", "sf1"))); } // TODO move to to engine-only @@ -2710,9 +3361,13 @@ public void testWrittenStats() { String tableName = "test_written_stats_" + randomNameSuffix(); try { String sql = "CREATE TABLE " + tableName + " AS SELECT * FROM nation"; - MaterializedResultWithQueryId - resultResultWithQueryId = getDistributedQueryRunner().executeWithQueryId(getSession(), sql); - QueryInfo queryInfo = getDistributedQueryRunner().getCoordinator().getQueryManager().getFullQueryInfo(resultResultWithQueryId.getQueryId()); + MaterializedResultWithQueryId resultResultWithQueryId = + getDistributedQueryRunner().executeWithQueryId(getSession(), sql); + QueryInfo queryInfo = + getDistributedQueryRunner() + .getCoordinator() + .getQueryManager() + .getFullQueryInfo(resultResultWithQueryId.getQueryId()); assertEquals(queryInfo.getQueryStats().getOutputPositions(), 1L); assertEquals(queryInfo.getQueryStats().getWrittenPositions(), 25L); @@ -2720,7 +3375,11 @@ public void testWrittenStats() { sql = "INSERT INTO " + tableName + " SELECT * FROM nation LIMIT 10"; resultResultWithQueryId = getDistributedQueryRunner().executeWithQueryId(getSession(), sql); - queryInfo = getDistributedQueryRunner().getCoordinator().getQueryManager().getFullQueryInfo(resultResultWithQueryId.getQueryId()); + queryInfo = + getDistributedQueryRunner() + .getCoordinator() + .getQueryManager() + .getFullQueryInfo(resultResultWithQueryId.getQueryId()); assertEquals(queryInfo.getQueryStats().getOutputPositions(), 1L); assertEquals(queryInfo.getQueryStats().getWrittenPositions(), 10L); @@ -2745,11 +3404,14 @@ protected void testColumnName(String columnName, boolean delimited) { if (delimited) { nameInSql = "\"" + columnName.replace("\"", "\"\"") + "\""; } - String tableName = "tcn_" + nameInSql.toLowerCase(ENGLISH).replaceAll("[^a-z0-9]", "") + randomNameSuffix(); + String tableName = + "tcn_" + nameInSql.toLowerCase(ENGLISH).replaceAll("[^a-z0-9]", "") + randomNameSuffix(); try { - // TODO test with both CTAS *and* CREATE TABLE + INSERT, since they use different connector API methods. - assertUpdate("CREATE TABLE " + tableName + "(key varchar(50), " + nameInSql + " varchar(50))"); + // TODO test with both CTAS *and* CREATE TABLE + INSERT, since they use different connector + // API methods. + assertUpdate( + "CREATE TABLE " + tableName + "(key varchar(50), " + nameInSql + " varchar(50))"); } catch (RuntimeException e) { if (isColumnNameRejected(e, columnName, delimited)) { // It is OK if give column name is not allowed and is clearly rejected by the connector. @@ -2758,23 +3420,34 @@ protected void testColumnName(String columnName, boolean delimited) { throw e; } try { - assertUpdate("INSERT INTO " + tableName + " VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')", 3); + assertUpdate( + "INSERT INTO " + + tableName + + " VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')", + 3); // SELECT * - assertQuery("SELECT * FROM " + tableName, "VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')"); + assertQuery( + "SELECT * FROM " + tableName, + "VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')"); // projection assertQuery("SELECT " + nameInSql + " FROM " + tableName, "VALUES (NULL), ('abc'), ('xyz')"); // predicate - assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')"); - assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')"); + assertQuery( + "SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", + "VALUES ('null value')"); + assertQuery( + "SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", + "VALUES ('sample value')"); } finally { assertUpdate("DROP TABLE " + tableName); } } - protected boolean isColumnNameRejected(Exception exception, String columnName, boolean delimited) { + protected boolean isColumnNameRejected( + Exception exception, String columnName, boolean delimited) { return false; } @@ -2821,10 +3494,12 @@ protected Optional filterColumnNameTestData(String columnName) { } protected String dataMappingTableName(String trinoTypeName) { - return "test_data_mapping_smoke_" + trinoTypeName.replaceAll("[^a-zA-Z0-9]", "_") + "_" + randomNameSuffix(); + return "test_data_mapping_smoke_" + + trinoTypeName.replaceAll("[^a-zA-Z0-9]", "_") + + "_" + + randomNameSuffix(); } - @DataProvider public final Object[][] testDataMappingSmokeTestDataProvider() { return testDataMappingSmokeTestData().stream() @@ -2841,15 +3516,33 @@ private List testDataMappingSmokeTestData() { .add(new DataMappingTestSetup("integer", "1274942432", "2147483647")) .add(new DataMappingTestSetup("bigint", "312739231274942432", "9223372036854775807")) .add(new DataMappingTestSetup("real", "REAL '567.123'", "REAL '999999.999'")) - .add(new DataMappingTestSetup("double", "DOUBLE '1234567890123.123'", "DOUBLE '9999999999999.999'")) + .add( + new DataMappingTestSetup( + "double", "DOUBLE '1234567890123.123'", "DOUBLE '9999999999999.999'")) .add(new DataMappingTestSetup("decimal(5,3)", "12.345", "99.999")) .add(new DataMappingTestSetup("decimal(15,3)", "123456789012.345", "999999999999.99")) - .add(new DataMappingTestSetup("date", "DATE '0001-01-01'", "DATE '1582-10-04'")) // before julian->gregorian switch - .add(new DataMappingTestSetup("date", "DATE '1582-10-05'", "DATE '1582-10-14'")) // during julian->gregorian switch + .add( + new DataMappingTestSetup( + "date", + "DATE '0001-01-01'", + "DATE '1582-10-04'")) // before julian->gregorian switch + .add( + new DataMappingTestSetup( + "date", + "DATE '1582-10-05'", + "DATE '1582-10-14'")) // during julian->gregorian switch .add(new DataMappingTestSetup("date", "DATE '2020-02-12'", "DATE '9999-12-31'")) .add(new DataMappingTestSetup("time", "TIME '15:03:00'", "TIME '23:59:59.999'")) - .add(new DataMappingTestSetup("timestamp", "TIMESTAMP '2020-02-12 15:03:00'", "TIMESTAMP '2199-12-31 23:59:59.999'")) - .add(new DataMappingTestSetup("timestamp(3) with time zone", "TIMESTAMP '2020-02-12 15:03:00 +01:00'", "TIMESTAMP '9999-12-31 23:59:59.999 +12:00'")) + .add( + new DataMappingTestSetup( + "timestamp", + "TIMESTAMP '2020-02-12 15:03:00'", + "TIMESTAMP '2199-12-31 23:59:59.999'")) + .add( + new DataMappingTestSetup( + "timestamp(3) with time zone", + "TIMESTAMP '2020-02-12 15:03:00 +01:00'", + "TIMESTAMP '9999-12-31 23:59:59.999 +12:00'")) .add(new DataMappingTestSetup("char(3)", "'ab'", "'zzz'")) .add(new DataMappingTestSetup("varchar(3)", "'de'", "'zzz'")) .add(new DataMappingTestSetup("varchar", "'łąka for the win'", "'ŻŻŻŻŻŻŻŻŻŻ'")) @@ -2857,7 +3550,8 @@ private List testDataMappingSmokeTestData() { .build(); } - protected Optional filterDataMappingSmokeTestData(DataMappingTestSetup dataMappingTestSetup) { + protected Optional filterDataMappingSmokeTestData( + DataMappingTestSetup dataMappingTestSetup) { return Optional.of(dataMappingTestSetup); } @@ -2900,10 +3594,18 @@ private void testMaterializedViewColumnName(String columnName, boolean delimited if (delimited) { nameInSql = "\"" + columnName.replace("\"", "\"\"") + "\""; } - String viewName = "tcn_" + nameInSql.toLowerCase(ENGLISH).replaceAll("[^a-z0-9]", "_") + "_" + randomNameSuffix(); + String viewName = + "tcn_" + + nameInSql.toLowerCase(ENGLISH).replaceAll("[^a-z0-9]", "_") + + "_" + + randomNameSuffix(); try { - assertUpdate("CREATE MATERIALIZED VIEW " + viewName + " AS SELECT 'sample value' key, 'abc' " + nameInSql); + assertUpdate( + "CREATE MATERIALIZED VIEW " + + viewName + + " AS SELECT 'sample value' key, 'abc' " + + nameInSql); } catch (RuntimeException e) { if (isColumnNameRejected(e, columnName, delimited)) { // It is OK if give column name is not allowed and is clearly rejected by the connector. @@ -2918,21 +3620,35 @@ private void testMaterializedViewColumnName(String columnName, boolean delimited assertUpdate("DROP MATERIALIZED VIEW " + viewName); } - protected Consumer assertPartialLimitWithPreSortedInputsCount(Session session, int expectedCount) { + protected Consumer assertPartialLimitWithPreSortedInputsCount( + Session session, int expectedCount) { return plan -> { - int actualCount = searchFrom(plan.getRoot()) - .where(node -> node instanceof LimitNode && ((LimitNode) node).isPartial() && ((LimitNode) node).requiresPreSortedInputs()) - .findAll() - .size(); + int actualCount = + searchFrom(plan.getRoot()) + .where( + node -> + node instanceof LimitNode + && ((LimitNode) node).isPartial() + && ((LimitNode) node).requiresPreSortedInputs()) + .findAll() + .size(); if (actualCount != expectedCount) { Metadata metadata = getDistributedQueryRunner().getMetadata(); FunctionManager functionManager = getDistributedQueryRunner().getFunctionManager(); - String formattedPlan = textLogicalPlan(plan.getRoot(), plan.getTypes(), metadata, functionManager, StatsAndCosts.empty(), session, 0, false); - throw new AssertionError(format( - "Expected [\n%s\n] partial limit but found [\n%s\n] partial limit. Actual plan is [\n\n%s\n]", - expectedCount, - actualCount, - formattedPlan)); + String formattedPlan = + textLogicalPlan( + plan.getRoot(), + plan.getTypes(), + metadata, + functionManager, + StatsAndCosts.empty(), + session, + 0, + false); + throw new AssertionError( + format( + "Expected [\n%s\n] partial limit but found [\n%s\n] partial limit. Actual plan is [\n\n%s\n]", + expectedCount, actualCount, formattedPlan)); } }; } @@ -2948,11 +3664,16 @@ protected static final class DataMappingTestSetup { private final boolean unsupportedType; - public DataMappingTestSetup(String trinoTypeName, String sampleValueLiteral, String highValueLiteral) { + public DataMappingTestSetup( + String trinoTypeName, String sampleValueLiteral, String highValueLiteral) { this(trinoTypeName, sampleValueLiteral, highValueLiteral, false); } - private DataMappingTestSetup(String trinoTypeName, String sampleValueLiteral, String highValueLiteral, boolean unsupportedType) { + private DataMappingTestSetup( + String trinoTypeName, + String sampleValueLiteral, + String highValueLiteral, + boolean unsupportedType) { this.trinoTypeName = requireNonNull(trinoTypeName, "trinoTypeName is null"); this.sampleValueLiteral = requireNonNull(sampleValueLiteral, "sampleValueLiteral is null"); this.highValueLiteral = requireNonNull(highValueLiteral, "highValueLiteral is null"); @@ -2976,11 +3697,7 @@ public boolean isUnsupportedType() { } public DataMappingTestSetup asUnsupported() { - return new DataMappingTestSetup( - trinoTypeName, - sampleValueLiteral, - highValueLiteral, - true); + return new DataMappingTestSetup(trinoTypeName, sampleValueLiteral, highValueLiteral, true); } @Override @@ -2989,4 +3706,4 @@ public String toString() { return trinoTypeName + (unsupportedType ? "!" : ""); } } -} \ No newline at end of file +} diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/SchemaInitializer.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/SchemaInitializer.java index eab238fa40..7bc1a747b5 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/SchemaInitializer.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/SchemaInitializer.java @@ -18,6 +18,10 @@ package com.netease.arctic.trino.iceberg; +import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; +import static io.trino.testing.QueryAssertions.copyTpchTables; +import static java.util.Objects.requireNonNull; + import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.trino.testing.QueryRunner; @@ -27,17 +31,15 @@ import java.util.function.Consumer; import java.util.stream.Collectors; -import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; -import static io.trino.testing.QueryAssertions.copyTpchTables; -import static java.util.Objects.requireNonNull; - -public class SchemaInitializer - implements Consumer { +public class SchemaInitializer implements Consumer { private final String schemaName; private final Map schemaProperties; private final Iterable> clonedTpchTables; - private SchemaInitializer(String schemaName, Map schemaProperties, Iterable> tpchTablesToClone) { + private SchemaInitializer( + String schemaName, + Map schemaProperties, + Iterable> tpchTablesToClone) { this.schemaName = requireNonNull(schemaName, "schemaName is null"); this.schemaProperties = requireNonNull(schemaProperties, "schemaProperties is null"); this.clonedTpchTables = requireNonNull(tpchTablesToClone, "tpchTablesToClone is null"); @@ -49,11 +51,16 @@ public String getSchemaName() { @Override public void accept(QueryRunner queryRunner) { - String schemaProperties = this.schemaProperties.entrySet().stream() - .map(entry -> entry.getKey() + " = " + entry.getValue()) - .collect(Collectors.joining(", ", " WITH ( ", " )")); - queryRunner.execute("CREATE SCHEMA IF NOT EXISTS " + schemaName + (this.schemaProperties.size() > 0 ? schemaProperties : "")); - copyTpchTables(queryRunner, "tpch", TINY_SCHEMA_NAME, queryRunner.getDefaultSession(), clonedTpchTables); + String schemaProperties = + this.schemaProperties.entrySet().stream() + .map(entry -> entry.getKey() + " = " + entry.getValue()) + .collect(Collectors.joining(", ", " WITH ( ", " )")); + queryRunner.execute( + "CREATE SCHEMA IF NOT EXISTS " + + schemaName + + (this.schemaProperties.size() > 0 ? schemaProperties : "")); + copyTpchTables( + queryRunner, "tpch", TINY_SCHEMA_NAME, queryRunner.getDefaultSession(), clonedTpchTables); } public static Builder builder() { diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticCatalogFactory.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticCatalogFactory.java index a09e3f3caf..879771caed 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticCatalogFactory.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticCatalogFactory.java @@ -42,7 +42,8 @@ public ArcticCatalog getArcticCatalog() { if (arcticCatalog == null) { synchronized (this) { if (arcticCatalog == null) { - try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(this.getClass().getClassLoader())) { + try (ThreadContextClassLoader ignored = + new ThreadContextClassLoader(this.getClass().getClassLoader())) { this.arcticCatalog = new ArcticCatalogSupportTableSuffix( new TestBasicArcticCatalog(arcticConfig.getCatalogUrl())); @@ -55,6 +56,6 @@ public ArcticCatalog getArcticCatalog() { @Override public TableMetaStore getTableMetastore() { - return TableMetaStore.EMPTY; + return TableMetaStore.EMPTY; } } diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticConnectorFactory.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticConnectorFactory.java index 57325a9dc2..b15c894524 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticConnectorFactory.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticConnectorFactory.java @@ -18,6 +18,8 @@ package com.netease.arctic.trino.iceberg; +import static com.google.inject.Scopes.SINGLETON; + import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.TypeLiteral; @@ -64,8 +66,6 @@ import java.util.Optional; import java.util.Set; -import static com.google.inject.Scopes.SINGLETON; - public class TestArcticConnectorFactory implements ConnectorFactory { private static final Logger LOG = LoggerFactory.getLogger(TestArcticConnectorFactory.class); @@ -76,46 +76,58 @@ public String getName() { } @Override - public Connector create(String catalogName, Map config, ConnectorContext context) { + public Connector create( + String catalogName, Map config, ConnectorContext context) { ClassLoader classLoader = InternalIcebergConnectorFactory.class.getClassLoader(); try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - Bootstrap app = new Bootstrap( - new EventModule(), - new MBeanModule(), - new ConnectorObjectNameGeneratorModule("io.trino.plugin.iceberg", "trino.plugin.iceberg"), - new JsonModule(), - new TestUnionModule(), - new IcebergSecurityModule(), - new MBeanServerModule(), - binder -> { - binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); - binder.bind(NodeManager.class).toInstance(context.getNodeManager()); - binder.bind(TypeManager.class).toInstance(context.getTypeManager()); - binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); - binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); - binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(SINGLETON); - }); + Bootstrap app = + new Bootstrap( + new EventModule(), + new MBeanModule(), + new ConnectorObjectNameGeneratorModule( + "io.trino.plugin.iceberg", "trino.plugin.iceberg"), + new JsonModule(), + new TestUnionModule(), + new IcebergSecurityModule(), + new MBeanServerModule(), + binder -> { + binder + .bind(NodeVersion.class) + .toInstance( + new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); + binder.bind(NodeManager.class).toInstance(context.getNodeManager()); + binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); + binder.bind(CatalogName.class).toInstance(new CatalogName(catalogName)); + binder + .bind(TrinoFileSystemFactory.class) + .to(HdfsFileSystemFactory.class) + .in(SINGLETON); + }); - Injector injector = app - .doNotInitializeLogging() - .setRequiredConfigurationProperties(config) - .initialize(); + Injector injector = + app.doNotInitializeLogging().setRequiredConfigurationProperties(config).initialize(); LifeCycleManager lifeCycleManager = injector.getInstance(LifeCycleManager.class); - ArcticTransactionManager transactionManager = injector.getInstance(ArcticTransactionManager.class); + ArcticTransactionManager transactionManager = + injector.getInstance(ArcticTransactionManager.class); ConnectorSplitManager splitManager = injector.getInstance(ConnectorSplitManager.class); - ConnectorPageSourceProvider connectorPageSource = injector.getInstance(ConnectorPageSourceProvider.class); - ConnectorPageSinkProvider pageSinkProvider = injector.getInstance(ConnectorPageSinkProvider.class); - ConnectorNodePartitioningProvider connectorDistributionProvider = injector.getInstance(ConnectorNodePartitioningProvider.class); - Set sessionPropertiesProviders = injector.getInstance(Key.get(new TypeLiteral>() { - })); - IcebergTableProperties icebergTableProperties = injector.getInstance(IcebergTableProperties.class); - Set procedures = injector.getInstance(Key.get(new TypeLiteral>() { - })); - Set tableProcedures = injector.getInstance(Key.get(new TypeLiteral>() { - })); - Optional accessControl = injector.getInstance(Key.get(new TypeLiteral>() { - })); + ConnectorPageSourceProvider connectorPageSource = + injector.getInstance(ConnectorPageSourceProvider.class); + ConnectorPageSinkProvider pageSinkProvider = + injector.getInstance(ConnectorPageSinkProvider.class); + ConnectorNodePartitioningProvider connectorDistributionProvider = + injector.getInstance(ConnectorNodePartitioningProvider.class); + Set sessionPropertiesProviders = + injector.getInstance(Key.get(new TypeLiteral>() {})); + IcebergTableProperties icebergTableProperties = + injector.getInstance(IcebergTableProperties.class); + Set procedures = + injector.getInstance(Key.get(new TypeLiteral>() {})); + Set tableProcedures = + injector.getInstance(Key.get(new TypeLiteral>() {})); + Optional accessControl = + injector.getInstance(Key.get(new TypeLiteral>() {})); return new ArcticConnector( lifeCycleManager, diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticTable.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticTable.java index c6cd1d533d..e0b95abfc6 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticTable.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestArcticTable.java @@ -51,7 +51,7 @@ import java.util.List; import java.util.Map; -//extends BasicUnkeyedTable is for adapt IcebergMeta +// extends BasicUnkeyedTable is for adapt IcebergMeta public class TestArcticTable extends BasicUnkeyedTable { private BaseTable table; diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBaseArcticConnectorTest.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBaseArcticConnectorTest.java index ac57f1518a..d6d82675f3 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBaseArcticConnectorTest.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBaseArcticConnectorTest.java @@ -18,6 +18,43 @@ package com.netease.arctic.trino.iceberg; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.Iterables.concat; +import static com.google.common.collect.Iterables.getOnlyElement; +import static com.google.common.collect.MoreCollectors.onlyElement; +import static io.trino.SystemSessionProperties.PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS; +import static io.trino.SystemSessionProperties.SCALE_WRITERS; +import static io.trino.plugin.iceberg.IcebergFileFormat.AVRO; +import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; +import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; +import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; +import static io.trino.spi.predicate.Domain.multipleValues; +import static io.trino.spi.predicate.Domain.singleValue; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.VarcharType.VARCHAR; +import static io.trino.testing.MaterializedResult.resultBuilder; +import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.testing.assertions.Assert.assertEquals; +import static io.trino.testing.assertions.Assert.assertEventually; +import static io.trino.tpch.TpchTable.LINE_ITEM; +import static io.trino.transaction.TransactionBuilder.transaction; +import static java.lang.String.format; +import static java.lang.String.join; +import static java.util.Collections.nCopies; +import static java.util.stream.Collectors.joining; +import static java.util.stream.Collectors.toUnmodifiableList; +import static java.util.stream.IntStream.range; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; @@ -68,70 +105,32 @@ import java.util.stream.LongStream; import java.util.stream.Stream; -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Verify.verify; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.Iterables.concat; -import static com.google.common.collect.Iterables.getOnlyElement; -import static com.google.common.collect.MoreCollectors.onlyElement; -import static io.trino.SystemSessionProperties.PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS; -import static io.trino.SystemSessionProperties.SCALE_WRITERS; -import static io.trino.plugin.iceberg.IcebergFileFormat.AVRO; -import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; -import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; -import static io.trino.plugin.iceberg.IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED; -import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; -import static io.trino.spi.predicate.Domain.multipleValues; -import static io.trino.spi.predicate.Domain.singleValue; -import static io.trino.spi.type.BigintType.BIGINT; -import static io.trino.spi.type.DoubleType.DOUBLE; -import static io.trino.spi.type.VarcharType.VARCHAR; -import static io.trino.testing.MaterializedResult.resultBuilder; -import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; -import static io.trino.testing.TestingNames.randomNameSuffix; -import static io.trino.testing.TestingSession.testSessionBuilder; -import static io.trino.testing.assertions.Assert.assertEquals; -import static io.trino.testing.assertions.Assert.assertEventually; -import static io.trino.tpch.TpchTable.LINE_ITEM; -import static io.trino.transaction.TransactionBuilder.transaction; -import static java.lang.String.format; -import static java.lang.String.join; -import static java.util.Collections.nCopies; -import static java.util.stream.Collectors.joining; -import static java.util.stream.Collectors.toUnmodifiableList; -import static java.util.stream.IntStream.range; -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -public class TestBaseArcticConnectorTest - extends BaseConnectorTest { - private static final Pattern WITH_CLAUSE_EXTRACTOR = Pattern.compile(".*(WITH\\s*\\([^)]*\\))\\s*$", Pattern.DOTALL); +public class TestBaseArcticConnectorTest extends BaseConnectorTest { + private static final Pattern WITH_CLAUSE_EXTRACTOR = + Pattern.compile(".*(WITH\\s*\\([^)]*\\))\\s*$", Pattern.DOTALL); private final IcebergFileFormat format = PARQUET; @Override - protected QueryRunner createQueryRunner() - throws Exception { + protected QueryRunner createQueryRunner() throws Exception { return ArcticQueryRunner.builder() .setIcebergProperties(Map.of("iceberg.file-format", format.name())) - .setInitialTables(ImmutableList.>builder() - .addAll(REQUIRED_TPCH_TABLES) - .add(LINE_ITEM) - .build()) + .setInitialTables( + ImmutableList.>builder() + .addAll(REQUIRED_TPCH_TABLES) + .add(LINE_ITEM) + .build()) .build(); } - protected DistributedQueryRunner createQueryRunnerForClient() - throws Exception { + protected DistributedQueryRunner createQueryRunnerForClient() throws Exception { return ArcticQueryRunner.builder() .setExtraProperties(ImmutableMap.of("http-server.http.port", "8080")) - .setInitialTables(ImmutableList.>builder() - .addAll(REQUIRED_TPCH_TABLES) - .add(LINE_ITEM) - .build()) + .setInitialTables( + ImmutableList.>builder() + .addAll(REQUIRED_TPCH_TABLES) + .add(LINE_ITEM) + .build()) .build(); } @@ -163,31 +162,32 @@ public void testCharVarcharComparison() { } // @Test -// @Override -// public void testShowCreateSchema() -// { -// assertThat(computeActual("SHOW CREATE SCHEMA tpch").getOnlyValue().toString()) -// .matches("CREATE SCHEMA arctic.tpch\n" + -// "AUTHORIZATION USER user\n" + -// "WITH \\(\n" + -//// "\\s+location = '.*/arctic_data/tpch'\n" + -// "\\)"); -// } -// + // @Override + // public void testShowCreateSchema() + // { + // assertThat(computeActual("SHOW CREATE SCHEMA tpch").getOnlyValue().toString()) + // .matches("CREATE SCHEMA arctic.tpch\n" + + // "AUTHORIZATION USER user\n" + + // "WITH \\(\n" + + //// "\\s+location = '.*/arctic_data/tpch'\n" + + // "\\)"); + // } + // @Override @Test public void testDescribeTable() { - MaterializedResult expectedColumns = resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) - .row("orderkey", "bigint", "", "") - .row("custkey", "bigint", "", "") - .row("orderstatus", "varchar", "", "") - .row("totalprice", "double", "", "") - .row("orderdate", "date", "", "") - .row("orderpriority", "varchar", "", "") - .row("clerk", "varchar", "", "") - .row("shippriority", "integer", "", "") - .row("comment", "varchar", "", "") - .build(); + MaterializedResult expectedColumns = + resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("orderkey", "bigint", "", "") + .row("custkey", "bigint", "", "") + .row("orderstatus", "varchar", "", "") + .row("totalprice", "double", "", "") + .row("orderdate", "date", "", "") + .row("orderpriority", "varchar", "", "") + .row("clerk", "varchar", "", "") + .row("shippriority", "integer", "", "") + .row("comment", "varchar", "", "") + .build(); MaterializedResult actualColumns = computeActual("DESCRIBE orders"); assertEquals(actualColumns, expectedColumns); } @@ -197,28 +197,35 @@ public void testDescribeTable() { public void testShowCreateTable() { File tempDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(); assertThat(computeActual("SHOW CREATE TABLE orders").getOnlyValue()) - .isEqualTo("CREATE TABLE arctic.tpch.orders (\n" + - " orderkey bigint,\n" + - " custkey bigint,\n" + - " orderstatus varchar,\n" + - " totalprice double,\n" + - " orderdate date,\n" + - " orderpriority varchar,\n" + - " clerk varchar,\n" + - " shippriority integer,\n" + - " comment varchar\n" + - ")\n" + - "WITH (\n" + - " format = '" + format.name() + "',\n" + - " format_version = 2\n" + -// " location = '" + tempDir + "/arctic_data/tpch/orders'\n" + - ")"); + .isEqualTo( + "CREATE TABLE arctic.tpch.orders (\n" + + " orderkey bigint,\n" + + " custkey bigint,\n" + + " orderstatus varchar,\n" + + " totalprice double,\n" + + " orderdate date,\n" + + " orderpriority varchar,\n" + + " clerk varchar,\n" + + " shippriority integer,\n" + + " comment varchar\n" + + ")\n" + + "WITH (\n" + + " format = '" + + format.name() + + "',\n" + + " format_version = 2\n" + + + // " location = '" + tempDir + + // "/arctic_data/tpch/orders'\n" + + ")"); } @Override - protected void checkInformationSchemaViewsForMaterializedView(String schemaName, String viewName) { + protected void checkInformationSchemaViewsForMaterializedView( + String schemaName, String viewName) { // TODO should probably return materialized view, as it's also a view -- to be double checked - assertThatThrownBy(() -> super.checkInformationSchemaViewsForMaterializedView(schemaName, viewName)) + assertThatThrownBy( + () -> super.checkInformationSchemaViewsForMaterializedView(schemaName, viewName)) .hasMessageFindingMatch("(?s)Expecting.*to contain:.*\\Q[(" + viewName + ")]"); } @@ -245,17 +252,31 @@ public void testDecimal() { } private void testDecimalWithPrecisionAndScale(int precision, int scale) { - checkArgument(precision >= 1 && precision <= 38, "Decimal precision (%s) must be between 1 and 38 inclusive", precision); - checkArgument(scale < precision && scale >= 0, "Decimal scale (%s) must be less than the precision (%s) and non-negative", scale, precision); + checkArgument( + precision >= 1 && precision <= 38, + "Decimal precision (%s) must be between 1 and 38 inclusive", + precision); + checkArgument( + scale < precision && scale >= 0, + "Decimal scale (%s) must be less than the precision (%s) and non-negative", + scale, + precision); String decimalType = format("DECIMAL(%d,%d)", precision, scale); - String beforeTheDecimalPoint = "12345678901234567890123456789012345678".substring(0, precision - scale); + String beforeTheDecimalPoint = + "12345678901234567890123456789012345678".substring(0, precision - scale); String afterTheDecimalPoint = "09876543210987654321098765432109876543".substring(0, scale); String decimalValue = format("%s.%s", beforeTheDecimalPoint, afterTheDecimalPoint); assertUpdate(format("CREATE TABLE test_arctic_decimal (x %s)", decimalType)); - assertUpdate(format("INSERT INTO test_arctic_decimal (x) VALUES (CAST('%s' AS %s))", decimalValue, decimalType), 1); - assertQuery("SELECT * FROM test_arctic_decimal", format("SELECT CAST('%s' AS %s)", decimalValue, decimalType)); + assertUpdate( + format( + "INSERT INTO test_arctic_decimal (x) VALUES (CAST('%s' AS %s))", + decimalValue, decimalType), + 1); + assertQuery( + "SELECT * FROM test_arctic_decimal", + format("SELECT CAST('%s' AS %s)", decimalValue, decimalType)); dropTable("test_arctic_decimal"); } @@ -278,10 +299,16 @@ private void testSelectOrPartitionedByTime(boolean partitioned) { assertQuery(format("SELECT x FROM %s", tableName), "SELECT CAST('10:12:34' AS TIME)"); assertUpdate(format("INSERT INTO %s VALUES (TIME '9:00:00', 67890)", tableName), 1); assertQuery(format("SELECT COUNT(*) FROM %s", tableName), "SELECT 2"); - assertQuery(format("SELECT x FROM %s WHERE x = TIME '10:12:34'", tableName), "SELECT CAST('10:12:34' AS TIME)"); - assertQuery(format("SELECT x FROM %s WHERE x = TIME '9:00:00'", tableName), "SELECT CAST('9:00:00' AS TIME)"); - assertQuery(format("SELECT x FROM %s WHERE y = 12345", tableName), "SELECT CAST('10:12:34' AS TIME)"); - assertQuery(format("SELECT x FROM %s WHERE y = 67890", tableName), "SELECT CAST('9:00:00' AS TIME)"); + assertQuery( + format("SELECT x FROM %s WHERE x = TIME '10:12:34'", tableName), + "SELECT CAST('10:12:34' AS TIME)"); + assertQuery( + format("SELECT x FROM %s WHERE x = TIME '9:00:00'", tableName), + "SELECT CAST('9:00:00' AS TIME)"); + assertQuery( + format("SELECT x FROM %s WHERE y = 12345", tableName), "SELECT CAST('10:12:34' AS TIME)"); + assertQuery( + format("SELECT x FROM %s WHERE y = 67890", tableName), "SELECT CAST('9:00:00' AS TIME)"); dropTable(tableName); } @@ -297,43 +324,62 @@ public void testSelectByTimestamp() { private void testSelectOrPartitionedByTimestamp(boolean partitioned) { String tableName = format("test_%s_by_timestamp", partitioned ? "partitioned" : "selected"); - assertUpdate(format("CREATE TABLE %s (_timestamp timestamp(6)) %s", - tableName, partitioned ? "WITH (partitioning = ARRAY['_timestamp'])" : "")); - @Language("SQL") String select1 = "SELECT TIMESTAMP '2017-05-01 10:12:34' _timestamp"; - @Language("SQL") String select2 = "SELECT TIMESTAMP '2017-10-01 10:12:34' _timestamp"; - @Language("SQL") String select3 = "SELECT TIMESTAMP '2018-05-01 10:12:34' _timestamp"; + assertUpdate( + format( + "CREATE TABLE %s (_timestamp timestamp(6)) %s", + tableName, partitioned ? "WITH (partitioning = ARRAY['_timestamp'])" : "")); + @Language("SQL") + String select1 = "SELECT TIMESTAMP '2017-05-01 10:12:34' _timestamp"; + @Language("SQL") + String select2 = "SELECT TIMESTAMP '2017-10-01 10:12:34' _timestamp"; + @Language("SQL") + String select3 = "SELECT TIMESTAMP '2018-05-01 10:12:34' _timestamp"; assertUpdate(format("INSERT INTO %s %s", tableName, select1), 1); assertUpdate(format("INSERT INTO %s %s", tableName, select2), 1); assertUpdate(format("INSERT INTO %s %s", tableName, select3), 1); assertQuery(format("SELECT COUNT(*) from %s", tableName), "SELECT 3"); - assertQuery(format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2017-05-01 10:12:34'", tableName), select1); - assertQuery(format("SELECT * from %s WHERE _timestamp < TIMESTAMP '2017-06-01 10:12:34'", tableName), select1); - assertQuery(format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2017-10-01 10:12:34'", tableName), select2); - assertQuery(format("SELECT * from %s WHERE _timestamp > TIMESTAMP '2017-06-01 10:12:34' AND _timestamp < TIMESTAMP '2018-05-01 10:12:34'", tableName), select2); - assertQuery(format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2018-05-01 10:12:34'", tableName), select3); - assertQuery(format("SELECT * from %s WHERE _timestamp > TIMESTAMP '2018-01-01 10:12:34'", tableName), select3); + assertQuery( + format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2017-05-01 10:12:34'", tableName), + select1); + assertQuery( + format("SELECT * from %s WHERE _timestamp < TIMESTAMP '2017-06-01 10:12:34'", tableName), + select1); + assertQuery( + format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2017-10-01 10:12:34'", tableName), + select2); + assertQuery( + format( + "SELECT * from %s WHERE _timestamp > TIMESTAMP '2017-06-01 10:12:34' AND _timestamp < TIMESTAMP '2018-05-01 10:12:34'", + tableName), + select2); + assertQuery( + format("SELECT * from %s WHERE _timestamp = TIMESTAMP '2018-05-01 10:12:34'", tableName), + select3); + assertQuery( + format("SELECT * from %s WHERE _timestamp > TIMESTAMP '2018-01-01 10:12:34'", tableName), + select3); dropTable(tableName); } -// @Test -// public void testPartitionByTimestampWithTimeZone() -// { -// testSelectOrPartitionedByTimestampWithTimeZone(true); -// } -// -// @Test -// public void testSelectByTimestampWithTimeZone() -// { -// testSelectOrPartitionedByTimestampWithTimeZone(false); -// } + // @Test + // public void testPartitionByTimestampWithTimeZone() + // { + // testSelectOrPartitionedByTimestampWithTimeZone(true); + // } + // + // @Test + // public void testSelectByTimestampWithTimeZone() + // { + // testSelectOrPartitionedByTimestampWithTimeZone(false); + // } private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned) { String tableName = format("test_%s_by_timestamptz", partitioned ? "partitioned" : "selected"); - assertUpdate(format( - "CREATE TABLE %s (_timestamptz timestamp(6) with time zone) %s", - tableName, - partitioned ? "WITH (partitioning = ARRAY['_timestamptz'])" : "")); + assertUpdate( + format( + "CREATE TABLE %s (_timestamptz timestamp(6) with time zone) %s", + tableName, partitioned ? "WITH (partitioning = ARRAY['_timestamptz'])" : "")); String instant1Utc = "TIMESTAMP '2021-10-31 00:30:00.005000 UTC'"; String instant1La = "TIMESTAMP '2021-10-30 17:30:00.005000 America/Los_Angeles'"; @@ -343,7 +389,8 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned) String instant3La = "TIMESTAMP '2021-10-30 17:30:00.007000 America/Los_Angeles'"; assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant1Utc), 1); - assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant2La /* non-UTC for this one */), 1); + assertUpdate( + format("INSERT INTO %s VALUES %s", tableName, instant2La /* non-UTC for this one */), 1); assertUpdate(format("INSERT INTO %s VALUES %s", tableName, instant3Utc), 1); assertQuery(format("SELECT COUNT(*) from %s", tableName), "SELECT 3"); @@ -394,15 +441,31 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned) .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); // open range - assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", tableName, instant1Utc, instant3Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", + tableName, instant1Utc, instant3Utc))) .matches("VALUES " + instant2Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", tableName, instant1La, instant3La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz > %s AND _timestamptz < %s", + tableName, instant1La, instant3La))) .matches("VALUES " + instant2Utc); // closed range - assertThat(query(format("SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", tableName, instant1Utc, instant2Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", + tableName, instant1Utc, instant2Utc))) .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); - assertThat(query(format("SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", tableName, instant1La, instant2La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz BETWEEN %s AND %s", + tableName, instant1La, instant2La))) .matches(format("VALUES %s, %s", instant1Utc, instant2Utc)); // != @@ -416,62 +479,131 @@ private void testSelectOrPartitionedByTimestampWithTimeZone(boolean partitioned) .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); // IS DISTINCT FROM - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", + tableName, instant1Utc))) .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant1La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", + tableName, instant1La))) .matches(format("VALUES %s, %s", instant2Utc, instant3Utc)); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", + tableName, instant2Utc))) .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", tableName, instant2La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS DISTINCT FROM %s", + tableName, instant2La))) .matches(format("VALUES %s, %s", instant1Utc, instant3Utc)); // IS NOT DISTINCT FROM - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant1Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant1Utc))) .matches("VALUES " + instant1Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant1La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant1La))) .matches("VALUES " + instant1Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant2Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant2Utc))) .matches("VALUES " + instant2Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant2La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant2La))) .matches("VALUES " + instant2Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant3Utc))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant3Utc))) .matches("VALUES " + instant3Utc); - assertThat(query(format("SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", tableName, instant3La))) + assertThat( + query( + format( + "SELECT * from %s WHERE _timestamptz IS NOT DISTINCT FROM %s", + tableName, instant3La))) .matches("VALUES " + instant3Utc); if (partitioned) { - assertThat(query(format("SELECT record_count, file_count, partition._timestamptz FROM \"%s$partitions\"", tableName))) - .matches(format("VALUES (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s)", instant1Utc, instant2Utc, instant3Utc)); + assertThat( + query( + format( + "SELECT record_count, file_count, partition._timestamptz FROM \"%s$partitions\"", + tableName))) + .matches( + format( + "VALUES (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s), (BIGINT '1', BIGINT '1', %s)", + instant1Utc, instant2Utc, instant3Utc)); } else { - assertThat(query(format("SELECT record_count, file_count, data._timestamptz FROM \"%s$partitions\"", tableName))) - .matches(format( - "VALUES (BIGINT '3', BIGINT '3', CAST(ROW(%s, %s, 0, NULL) AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)))", - instant1Utc, - format == ORC ? "TIMESTAMP '2021-10-31 00:30:00.007999 UTC'" : instant3Utc)); + assertThat( + query( + format( + "SELECT record_count, file_count, data._timestamptz FROM \"%s$partitions\"", + tableName))) + .matches( + format( + "VALUES (BIGINT '3', BIGINT '3', CAST(ROW(%s, %s, 0, NULL) AS row(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)))", + instant1Utc, + format == ORC ? "TIMESTAMP '2021-10-31 00:30:00.007999 UTC'" : instant3Utc)); } // show stats assertThat(query("SHOW STATS FOR " + tableName)) .skippingTypesCheck() - .matches("VALUES " + - "('_timestamptz', NULL, NULL, 0e0, NULL, '2021-10-31 00:30:00.005 UTC', '2021-10-31 00:30:00.007 UTC'), " + - "(NULL, NULL, NULL, NULL, 3e0, NULL, NULL)"); + .matches( + "VALUES " + + "('_timestamptz', NULL, NULL, 0e0, NULL, '2021-10-31 00:30:00.005 UTC', '2021-10-31 00:30:00.007 UTC'), " + + "(NULL, NULL, NULL, NULL, 3e0, NULL, NULL)"); if (partitioned) { // show stats with predicate - assertThat(query("SHOW STATS FOR (SELECT * FROM " + tableName + " WHERE _timestamptz = " + instant1La + ")")) + assertThat( + query( + "SHOW STATS FOR (SELECT * FROM " + + tableName + + " WHERE _timestamptz = " + + instant1La + + ")")) .skippingTypesCheck() - .matches("VALUES " + - // TODO (https://github.com/trinodb/trino/issues/9716) the min/max values are off by 1 millisecond - "('_timestamptz', NULL, NULL, 0e0, NULL, '2021-10-31 00:30:00.005 UTC', '2021-10-31 00:30:00.005 UTC'), " + - "(NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); + .matches( + "VALUES " + + + // TODO (https://github.com/trinodb/trino/issues/9716) the min/max values are off + // by 1 millisecond + "('_timestamptz', NULL, NULL, 0e0, NULL, '2021-10-31 00:30:00.005 UTC', '2021-10-31 00:30:00.005 UTC'), " + + "(NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); } else { // show stats with predicate - assertThat(query("SHOW STATS FOR (SELECT * FROM " + tableName + " WHERE _timestamptz = " + instant1La + ")")) + assertThat( + query( + "SHOW STATS FOR (SELECT * FROM " + + tableName + + " WHERE _timestamptz = " + + instant1La + + ")")) .skippingTypesCheck() - .matches("VALUES " + - "('_timestamptz', NULL, NULL, NULL, NULL, NULL, NULL), " + - "(NULL, NULL, NULL, NULL, NULL, NULL, NULL)"); + .matches( + "VALUES " + + "('_timestamptz', NULL, NULL, NULL, NULL, NULL, NULL), " + + "(NULL, NULL, NULL, NULL, NULL, NULL, NULL)"); } assertUpdate("DROP TABLE " + tableName); @@ -492,18 +624,36 @@ private void testSelectOrPartitionedByUuid(boolean partitioned) { String partitioning = partitioned ? "WITH (partitioning = ARRAY['x'])" : ""; assertUpdate(format("CREATE TABLE %s (x uuid, y bigint) %s", tableName, partitioning)); - assertUpdate(format("INSERT INTO %s VALUES (UUID '406caec7-68b9-4778-81b2-a12ece70c8b1', 12345)", tableName), 1); + assertUpdate( + format( + "INSERT INTO %s VALUES (UUID '406caec7-68b9-4778-81b2-a12ece70c8b1', 12345)", + tableName), + 1); assertQuery(format("SELECT count(*) FROM %s", tableName), "SELECT 1"); - assertQuery(format("SELECT x FROM %s", tableName), "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID)"); + assertQuery( + format("SELECT x FROM %s", tableName), + "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID)"); - assertUpdate(format("INSERT INTO %s VALUES (UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7', 67890)", tableName), 1); + assertUpdate( + format( + "INSERT INTO %s VALUES (UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7', 67890)", + tableName), + 1); assertUpdate(format("INSERT INTO %s VALUES (NULL, 7531)", tableName), 1); assertQuery(format("SELECT count(*) FROM %s", tableName), "SELECT 3"); - assertQuery(format("SELECT * FROM %s WHERE x = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", tableName), "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345"); - assertQuery(format("SELECT * FROM %s WHERE x = UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7'", tableName), "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890"); + assertQuery( + format("SELECT * FROM %s WHERE x = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", tableName), + "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID), 12345"); + assertQuery( + format("SELECT * FROM %s WHERE x = UUID 'f79c3e09-677c-4bbd-a479-3f349cb785e7'", tableName), + "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID), 67890"); assertQuery(format("SELECT * FROM %s WHERE x IS NULL", tableName), "SELECT NULL, 7531"); - assertQuery(format("SELECT x FROM %s WHERE y = 12345", tableName), "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID)"); - assertQuery(format("SELECT x FROM %s WHERE y = 67890", tableName), "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID)"); + assertQuery( + format("SELECT x FROM %s WHERE y = 12345", tableName), + "SELECT CAST('406caec7-68b9-4778-81b2-a12ece70c8b1' AS UUID)"); + assertQuery( + format("SELECT x FROM %s WHERE y = 67890", tableName), + "SELECT CAST('f79c3e09-677c-4bbd-a479-3f349cb785e7' AS UUID)"); assertQuery(format("SELECT x FROM %s WHERE y = 7531", tableName), "SELECT NULL"); assertUpdate("DROP TABLE " + tableName); @@ -511,10 +661,12 @@ private void testSelectOrPartitionedByUuid(boolean partitioned) { @Test public void testNestedUuid() { - assertUpdate("CREATE TABLE test_nested_uuid (int_t int, row_t row(uuid_t uuid, int_t int), map_t map(int, uuid), array_t array(uuid))"); + assertUpdate( + "CREATE TABLE test_nested_uuid (int_t int, row_t row(uuid_t uuid, int_t int), map_t map(int, uuid), array_t array(uuid))"); String uuid = "UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'"; - String value = format("VALUES (2, row(%1$s, 1), map(array[1], array[%1$s]), array[%1$s, %1$s])", uuid); + String value = + format("VALUES (2, row(%1$s, 1), map(array[1], array[%1$s]), array[%1$s, %1$s])", uuid); assertUpdate("INSERT INTO test_nested_uuid " + value, 1); assertThat(query("SELECT row_t.int_t, row_t.uuid_t FROM test_nested_uuid")) @@ -522,80 +674,87 @@ public void testNestedUuid() { assertThat(query("SELECT map_t[1] FROM test_nested_uuid")) .matches("VALUES UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'"); assertThat(query("SELECT array_t FROM test_nested_uuid")) - .matches("VALUES ARRAY[UUID '406caec7-68b9-4778-81b2-a12ece70c8b1', UUID '406caec7-68b9-4778-81b2-a12ece70c8b1']"); + .matches( + "VALUES ARRAY[UUID '406caec7-68b9-4778-81b2-a12ece70c8b1', UUID '406caec7-68b9-4778-81b2-a12ece70c8b1']"); - assertQuery("SELECT row_t.int_t FROM test_nested_uuid WHERE row_t.uuid_t = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", "VALUES 1"); - assertQuery("SELECT int_t FROM test_nested_uuid WHERE row_t.uuid_t = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", "VALUES 2"); + assertQuery( + "SELECT row_t.int_t FROM test_nested_uuid WHERE row_t.uuid_t = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", + "VALUES 1"); + assertQuery( + "SELECT int_t FROM test_nested_uuid WHERE row_t.uuid_t = UUID '406caec7-68b9-4778-81b2-a12ece70c8b1'", + "VALUES 2"); } @Test public void testCreatePartitionedTable() { - assertUpdate("" + - "CREATE TABLE test_partitioned_table (" + - " a_boolean boolean, " + - " an_integer integer, " + - " a_bigint bigint, " + - " a_real real, " + - " a_double double, " + - " a_short_decimal decimal(5,2), " + - " a_long_decimal decimal(38,20), " + - " a_varchar varchar, " + - " a_varbinary varbinary, " + - " a_date date, " + - " a_time time(6), " + - " a_timestamp timestamp(6), " + - " a_timestamptz timestamp(6) with time zone, " + - " a_uuid uuid, " + - " a_row row(id integer , vc varchar), " + - " an_array array(varchar), " + - " a_map map(integer, varchar), " + - " \"a quoted, field\" varchar" + - ") " + - "WITH (" + - "partitioning = ARRAY[" + - " 'a_boolean', " + - " 'an_integer', " + - " 'a_bigint', " + - " 'a_real', " + - " 'a_double', " + - " 'a_short_decimal', " + - " 'a_long_decimal', " + - " 'a_varchar', " + - " 'a_varbinary', " + - " 'a_date', " + - " 'a_time', " + - " 'a_timestamp', " + - " 'a_timestamptz', " + - " 'a_uuid', " + - " '\"a quoted, field\"' " + - // Note: partitioning on non-primitive columns is not allowed in Iceberg - " ]" + - ")"); + assertUpdate( + "" + + "CREATE TABLE test_partitioned_table (" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar), " + + " \"a quoted, field\" varchar" + + ") " + + "WITH (" + + "partitioning = ARRAY[" + + " 'a_boolean', " + + " 'an_integer', " + + " 'a_bigint', " + + " 'a_real', " + + " 'a_double', " + + " 'a_short_decimal', " + + " 'a_long_decimal', " + + " 'a_varchar', " + + " 'a_varbinary', " + + " 'a_date', " + + " 'a_time', " + + " 'a_timestamp', " + + " 'a_timestamptz', " + + " 'a_uuid', " + + " '\"a quoted, field\"' " + + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " ]" + + ")"); assertQueryReturnsEmptyResult("SELECT * FROM test_partitioned_table"); - String values = "VALUES (" + - "true, " + - "1, " + - "BIGINT '1', " + - "REAL '1.0', " + - "DOUBLE '1.0', " + - "CAST(1.0 AS decimal(5,2)), " + - "CAST(11.0 AS decimal(38,20)), " + - "VARCHAR 'onefsadfdsf', " + - "X'000102f0feff', " + - "DATE '2021-07-24'," + - "TIME '02:43:57.987654', " + - "TIMESTAMP '2021-07-24 03:43:57.987654'," + - "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + - "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + - "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + - "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + - "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']), " + - "VARCHAR 'tralala')"; - - String nullValues = nCopies(18, "NULL").stream() - .collect(joining(", ", "VALUES (", ")")); + String values = + "VALUES (" + + "true, " + + "1, " + + "BIGINT '1', " + + "REAL '1.0', " + + "DOUBLE '1.0', " + + "CAST(1.0 AS decimal(5,2)), " + + "CAST(11.0 AS decimal(38,20)), " + + "VARCHAR 'onefsadfdsf', " + + "X'000102f0feff', " + + "DATE '2021-07-24'," + + "TIME '02:43:57.987654', " + + "TIMESTAMP '2021-07-24 03:43:57.987654'," + + "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + + "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + + "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + + "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']), " + + "VARCHAR 'tralala')"; + + String nullValues = nCopies(18, "NULL").stream().collect(joining(", ", "VALUES (", ")")); assertUpdate("INSERT INTO test_partitioned_table " + values, 1); assertUpdate("INSERT INTO test_partitioned_table " + nullValues, 1); @@ -605,206 +764,222 @@ public void testCreatePartitionedTable() { .matches(values + " UNION ALL " + nullValues); // SELECT with predicates - assertThat(query("SELECT * FROM test_partitioned_table WHERE " + - " a_boolean = true " + - "AND an_integer = 1 " + - "AND a_bigint = BIGINT '1' " + - "AND a_real = REAL '1.0' " + - "AND a_double = DOUBLE '1.0' " + - "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + - "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + - "AND a_varchar = VARCHAR 'onefsadfdsf' " + - "AND a_varbinary = X'000102f0feff' " + - "AND a_date = DATE '2021-07-24' " + - "AND a_time = TIME '02:43:57.987654' " + - "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + - "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + - "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + - "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + - "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + - "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + - "AND \"a quoted, field\" = VARCHAR 'tralala' " + - "")) + assertThat( + query( + "SELECT * FROM test_partitioned_table WHERE " + + " a_boolean = true " + + "AND an_integer = 1 " + + "AND a_bigint = BIGINT '1' " + + "AND a_real = REAL '1.0' " + + "AND a_double = DOUBLE '1.0' " + + "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + + "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + + "AND a_varchar = VARCHAR 'onefsadfdsf' " + + "AND a_varbinary = X'000102f0feff' " + + "AND a_date = DATE '2021-07-24' " + + "AND a_time = TIME '02:43:57.987654' " + + "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + + "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + + "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + + "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + + "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + + "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + + "AND \"a quoted, field\" = VARCHAR 'tralala' " + + "")) .matches(values); - assertThat(query("SELECT * FROM test_partitioned_table WHERE " + - " a_boolean IS NULL " + - "AND an_integer IS NULL " + - "AND a_bigint IS NULL " + - "AND a_real IS NULL " + - "AND a_double IS NULL " + - "AND a_short_decimal IS NULL " + - "AND a_long_decimal IS NULL " + - "AND a_varchar IS NULL " + - "AND a_varbinary IS NULL " + - "AND a_date IS NULL " + - "AND a_time IS NULL " + - "AND a_timestamp IS NULL " + - "AND a_timestamptz IS NULL " + - "AND a_uuid IS NULL " + - "AND a_row IS NULL " + - "AND an_array IS NULL " + - "AND a_map IS NULL " + - "AND \"a quoted, field\" IS NULL " + - "")) + assertThat( + query( + "SELECT * FROM test_partitioned_table WHERE " + + " a_boolean IS NULL " + + "AND an_integer IS NULL " + + "AND a_bigint IS NULL " + + "AND a_real IS NULL " + + "AND a_double IS NULL " + + "AND a_short_decimal IS NULL " + + "AND a_long_decimal IS NULL " + + "AND a_varchar IS NULL " + + "AND a_varbinary IS NULL " + + "AND a_date IS NULL " + + "AND a_time IS NULL " + + "AND a_timestamp IS NULL " + + "AND a_timestamptz IS NULL " + + "AND a_uuid IS NULL " + + "AND a_row IS NULL " + + "AND an_array IS NULL " + + "AND a_map IS NULL " + + "AND \"a quoted, field\" IS NULL " + + "")) .skippingTypesCheck() .matches(nullValues); // SHOW STATS switch (format) { case ORC -> { - assertQuery("SHOW STATS FOR test_partitioned_table", - "VALUES " + - " ('a_boolean', NULL, NULL, 0.5, NULL, 'true', 'true'), " + - " ('an_integer', NULL, NULL, 0.5, NULL, '1', '1'), " + - " ('a_bigint', NULL, NULL, 0.5, NULL, '1', '1'), " + - " ('a_real', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + - " ('a_double', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + - " ('a_short_decimal', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + - " ('a_long_decimal', NULL, NULL, 0.5, NULL, '11.0', '11.0'), " + - " ('a_varchar', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a_varbinary', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a_date', NULL, NULL, 0.5, NULL, '2021-07-24', '2021-07-24'), " + - " ('a_time', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a_timestamp', NULL, NULL, 0.5, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + - " ('a_timestamptz', NULL, NULL, 0.5, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + - " ('a_uuid', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a_row', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('an_array', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a_map', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " ('a quoted, field', NULL, NULL, 0.5, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); + assertQuery( + "SHOW STATS FOR test_partitioned_table", + "VALUES " + + " ('a_boolean', NULL, NULL, 0.5, NULL, 'true', 'true'), " + + " ('an_integer', NULL, NULL, 0.5, NULL, '1', '1'), " + + " ('a_bigint', NULL, NULL, 0.5, NULL, '1', '1'), " + + " ('a_real', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + + " ('a_double', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + + " ('a_short_decimal', NULL, NULL, 0.5, NULL, '1.0', '1.0'), " + + " ('a_long_decimal', NULL, NULL, 0.5, NULL, '11.0', '11.0'), " + + " ('a_varchar', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a_varbinary', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a_date', NULL, NULL, 0.5, NULL, '2021-07-24', '2021-07-24'), " + + " ('a_time', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a_timestamp', NULL, NULL, 0.5, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + + " ('a_timestamptz', NULL, NULL, 0.5, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + + " ('a_uuid', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a_row', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('an_array', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a_map', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " ('a quoted, field', NULL, NULL, 0.5, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); } case PARQUET -> { assertThat(query("SHOW STATS FOR test_partitioned_table")) .skippingTypesCheck() - .matches("VALUES " + - " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + - " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + - " ('a_varchar', 234e0, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_varbinary', 114e0, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + - " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_timestamp', NULL, NULL, 0.5e0, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + - " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + - " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('a quoted, field', 224e0, NULL, 0.5e0, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + + " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + + " ('a_varchar', 234e0, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_varbinary', 114e0, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + + " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_timestamp', NULL, NULL, 0.5e0, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + + " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + + " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('a quoted, field', 224e0, NULL, 0.5e0, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); } case AVRO -> { assertThat(query("SHOW STATS FOR test_partitioned_table")) .skippingTypesCheck() - .matches("VALUES " + - " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + - " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + - " ('a_varchar', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_varbinary', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + - " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_timestamp', NULL, NULL, 0.5e0, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + - " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + - " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('a quoted, field', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + + " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + + " ('a_varchar', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_varbinary', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + + " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_timestamp', NULL, NULL, 0.5e0, NULL, '2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'), " + + " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + + " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_row', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('an_array', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('a_map', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('a quoted, field', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); } } // $partitions String schema = getSession().getSchema().orElseThrow(); - assertThat(query("SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name = 'test_partitioned_table$partitions' ")) + assertThat( + query( + "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + + schema + + "' AND table_name = 'test_partitioned_table$partitions' ")) .skippingTypesCheck() .matches("VALUES 'partition', 'record_count', 'file_count', 'total_size'"); - assertThat(query("SELECT " + - " record_count," + - " file_count, " + - " partition.a_boolean, " + - " partition.an_integer, " + - " partition.a_bigint, " + - " partition.a_real, " + - " partition.a_double, " + - " partition.a_short_decimal, " + - " partition.a_long_decimal, " + - " partition.a_varchar, " + - " partition.a_varbinary, " + - " partition.a_date, " + - " partition.a_time, " + - " partition.a_timestamp, " + - " partition.a_timestamptz, " + - " partition.a_uuid, " + - " partition.\"a quoted, field\" " + - // Note: partitioning on non-primitive columns is not allowed in Iceberg - " FROM \"test_partitioned_table$partitions\" ")) - .matches("" + - "VALUES (" + - " BIGINT '1', " + - " BIGINT '1', " + - " true, " + - " 1, " + - " BIGINT '1', " + - " REAL '1.0', " + - " DOUBLE '1.0', " + - " CAST(1.0 AS decimal(5,2)), " + - " CAST(11.0 AS decimal(38,20)), " + - " VARCHAR 'onefsadfdsf', " + - " X'000102f0feff', " + - " DATE '2021-07-24'," + - " TIME '02:43:57.987654', " + - " TIMESTAMP '2021-07-24 03:43:57.987654'," + - " TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + - " UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + - " VARCHAR 'tralala' " + - ")" + - "UNION ALL " + - "VALUES (" + - " BIGINT '1', " + - " BIGINT '1', " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL, " + - " NULL " + - ")"); + assertThat( + query( + "SELECT " + + " record_count," + + " file_count, " + + " partition.a_boolean, " + + " partition.an_integer, " + + " partition.a_bigint, " + + " partition.a_real, " + + " partition.a_double, " + + " partition.a_short_decimal, " + + " partition.a_long_decimal, " + + " partition.a_varchar, " + + " partition.a_varbinary, " + + " partition.a_date, " + + " partition.a_time, " + + " partition.a_timestamp, " + + " partition.a_timestamptz, " + + " partition.a_uuid, " + + " partition.\"a quoted, field\" " + + + // Note: partitioning on non-primitive columns is not allowed in Iceberg + " FROM \"test_partitioned_table$partitions\" ")) + .matches( + "" + + "VALUES (" + + " BIGINT '1', " + + " BIGINT '1', " + + " true, " + + " 1, " + + " BIGINT '1', " + + " REAL '1.0', " + + " DOUBLE '1.0', " + + " CAST(1.0 AS decimal(5,2)), " + + " CAST(11.0 AS decimal(38,20)), " + + " VARCHAR 'onefsadfdsf', " + + " X'000102f0feff', " + + " DATE '2021-07-24'," + + " TIME '02:43:57.987654', " + + " TIMESTAMP '2021-07-24 03:43:57.987654'," + + " TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + " UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + + " VARCHAR 'tralala' " + + ")" + + "UNION ALL " + + "VALUES (" + + " BIGINT '1', " + + " BIGINT '1', " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL, " + + " NULL " + + ")"); assertUpdate("DROP TABLE test_partitioned_table"); } @Test public void testCreatePartitionedTableWithNestedTypes() { - assertUpdate("" + - "CREATE TABLE test_partitioned_table_nested_type (" + - " _string VARCHAR" + - ", _struct ROW(_field1 INT, _field2 VARCHAR)" + - ", _date DATE" + - ") " + - "WITH (" + - " partitioning = ARRAY['_date']" + - ")"); + assertUpdate( + "" + + "CREATE TABLE test_partitioned_table_nested_type (" + + " _string VARCHAR" + + ", _struct ROW(_field1 INT, _field2 VARCHAR)" + + ", _date DATE" + + ") " + + "WITH (" + + " partitioning = ARRAY['_date']" + + ")"); dropTable("test_partitioned_table_nested_type"); } @@ -814,114 +989,134 @@ public void testCreatePartitionedTableAs() { File tempDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(); String tempDirPath = tempDir.toURI().toASCIIString() + randomNameSuffix(); assertUpdate( - "CREATE TABLE test_create_partitioned_table_as " + - "WITH (" + - "format_version = 2," + - "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(order_key,9)']" + - ") " + - "AS " + - "SELECT orderkey AS order_key, shippriority AS ship_priority, orderstatus AS order_status " + - "FROM tpch.tiny.orders", + "CREATE TABLE test_create_partitioned_table_as " + + "WITH (" + + "format_version = 2," + + "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(order_key,9)']" + + ") " + + "AS " + + "SELECT orderkey AS order_key, shippriority AS ship_priority, orderstatus AS order_status " + + "FROM tpch.tiny.orders", "SELECT count(*) from orders"); Assert.assertEquals( computeScalar("SHOW CREATE TABLE test_create_partitioned_table_as"), format( - "CREATE TABLE %s.%s.%s (\n" + - " order_key bigint,\n" + - " ship_priority integer,\n" + - " order_status varchar\n" + - ")\n" + - "WITH (\n" + - " format = '%s',\n" + - " format_version = 2,\n" + - " partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" + - ")", + "CREATE TABLE %s.%s.%s (\n" + + " order_key bigint,\n" + + " ship_priority integer,\n" + + " order_status varchar\n" + + ")\n" + + "WITH (\n" + + " format = '%s',\n" + + " format_version = 2,\n" + + " partitioning = ARRAY['order_status','ship_priority','bucket(order_key, 9)']\n" + + ")", getSession().getCatalog().orElseThrow(), getSession().getSchema().orElseThrow(), "test_create_partitioned_table_as", format, tempDirPath)); - assertQuery("SELECT * from test_create_partitioned_table_as", "SELECT orderkey, shippriority, orderstatus FROM orders"); + assertQuery( + "SELECT * from test_create_partitioned_table_as", + "SELECT orderkey, shippriority, orderstatus FROM orders"); dropTable("test_create_partitioned_table_as"); } -// @Test -// public void testTableComments() -// { -// File tempDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(); -// String tempDirPath = tempDir.toURI().toASCIIString() + randomNameSuffix(); -// String createTableTemplate = "" + -// "CREATE TABLE arctic.tpch.test_table_comments (\n" + -// " _x bigint\n" + -// ")\n" + -// "COMMENT '%s'\n" + -// "WITH (\n" + -// format(" format = '%s',\n", format) + -// " format_version = 2\n" + -//// format(" location = '%s'\n", tempDirPath) + -// ")"; -// String createTableWithoutComment = "" + -// "CREATE TABLE arctic.tpch.test_table_comments (\n" + -// " _x bigint\n" + -// ")\n" + -// "WITH (\n" + -// " format = '" + format + "',\n" + -// " format_version = 2\n" + -//// " location = '" + tempDirPath + "'\n" + -// ")"; -// String createTableSql = format(createTableTemplate, "test table comment", format); -// assertUpdate(createTableSql); -// assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), createTableSql); -// -// assertUpdate("COMMENT ON TABLE test_table_comments IS 'different test table comment'"); -// assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), format(createTableTemplate, "different test table comment", format)); -// -// assertUpdate("COMMENT ON TABLE test_table_comments IS NULL"); -// assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), createTableWithoutComment); -// dropTable("arctic.tpch.test_table_comments"); -// -// assertUpdate(createTableWithoutComment); -// assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), createTableWithoutComment); -// -// dropTable("arctic.tpch.test_table_comments"); -// } - -// @Test -// public void testRollbackSnapshot() -// { -// assertUpdate("CREATE TABLE test_rollback (col0 INTEGER, col1 BIGINT)"); -// long afterCreateTableId = getLatestSnapshotId("test_rollback"); -// -// assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (123, CAST(987 AS BIGINT))", 1); -// long afterFirstInsertId = getLatestSnapshotId("test_rollback"); -// -// assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (456, CAST(654 AS BIGINT))", 1); -// assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT)), (456, CAST(654 AS BIGINT))"); -// -// assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterFirstInsertId)); -// assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS BIGINT))"); -// -// assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterCreateTableId)); -// assertEquals((long) computeActual("SELECT COUNT(*) FROM test_rollback").getOnlyValue(), 0); -// -// assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (789, CAST(987 AS BIGINT))", 1); -// long afterSecondInsertId = getLatestSnapshotId("test_rollback"); -// -// // extra insert which should be dropped on rollback -// assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (999, CAST(999 AS BIGINT))", 1); -// -// assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", afterSecondInsertId)); -// assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (789, CAST(987 AS BIGINT))"); -// -// dropTable("test_rollback"); -// } + // @Test + // public void testTableComments() + // { + // File tempDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(); + // String tempDirPath = tempDir.toURI().toASCIIString() + randomNameSuffix(); + // String createTableTemplate = "" + + // "CREATE TABLE arctic.tpch.test_table_comments (\n" + + // " _x bigint\n" + + // ")\n" + + // "COMMENT '%s'\n" + + // "WITH (\n" + + // format(" format = '%s',\n", format) + + // " format_version = 2\n" + + //// format(" location = '%s'\n", tempDirPath) + + // ")"; + // String createTableWithoutComment = "" + + // "CREATE TABLE arctic.tpch.test_table_comments (\n" + + // " _x bigint\n" + + // ")\n" + + // "WITH (\n" + + // " format = '" + format + "',\n" + + // " format_version = 2\n" + + //// " location = '" + tempDirPath + "'\n" + + // ")"; + // String createTableSql = format(createTableTemplate, "test table comment", format); + // assertUpdate(createTableSql); + // assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), createTableSql); + // + // assertUpdate("COMMENT ON TABLE test_table_comments IS 'different test table comment'"); + // assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), + // format(createTableTemplate, "different test table comment", format)); + // + // assertUpdate("COMMENT ON TABLE test_table_comments IS NULL"); + // assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), + // createTableWithoutComment); + // dropTable("arctic.tpch.test_table_comments"); + // + // assertUpdate(createTableWithoutComment); + // assertEquals(computeScalar("SHOW CREATE TABLE test_table_comments"), + // createTableWithoutComment); + // + // dropTable("arctic.tpch.test_table_comments"); + // } + + // @Test + // public void testRollbackSnapshot() + // { + // assertUpdate("CREATE TABLE test_rollback (col0 INTEGER, col1 BIGINT)"); + // long afterCreateTableId = getLatestSnapshotId("test_rollback"); + // + // assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (123, CAST(987 AS BIGINT))", + // 1); + // long afterFirstInsertId = getLatestSnapshotId("test_rollback"); + // + // assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (456, CAST(654 AS BIGINT))", + // 1); + // assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS + // BIGINT)), (456, CAST(654 AS BIGINT))"); + // + // assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", + // afterFirstInsertId)); + // assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (123, CAST(987 AS + // BIGINT))"); + // + // assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", + // afterCreateTableId)); + // assertEquals((long) computeActual("SELECT COUNT(*) FROM test_rollback").getOnlyValue(), + // 0); + // + // assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (789, CAST(987 AS BIGINT))", + // 1); + // long afterSecondInsertId = getLatestSnapshotId("test_rollback"); + // + // // extra insert which should be dropped on rollback + // assertUpdate("INSERT INTO test_rollback (col0, col1) VALUES (999, CAST(999 AS BIGINT))", + // 1); + // + // assertUpdate(format("CALL system.rollback_to_snapshot('tpch', 'test_rollback', %s)", + // afterSecondInsertId)); + // assertQuery("SELECT * FROM test_rollback ORDER BY col0", "VALUES (789, CAST(987 AS + // BIGINT))"); + // + // dropTable("test_rollback"); + // } private long getLatestSnapshotId(String tableName) { - return (long) computeActual(format("SELECT snapshot_id FROM \"%s$snapshots\" ORDER BY committed_at DESC LIMIT 1", tableName)) - .getOnlyValue(); + return (long) + computeActual( + format( + "SELECT snapshot_id FROM \"%s$snapshots\" ORDER BY committed_at DESC LIMIT 1", + tableName)) + .getOnlyValue(); } @Override @@ -931,7 +1126,8 @@ protected String errorMessageForInsertIntoNotNullColumn(String columnName) { @Test public void testSchemaEvolution() { - assertUpdate("CREATE TABLE test_schema_evolution_drop_end (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); + assertUpdate( + "CREATE TABLE test_schema_evolution_drop_end (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); assertUpdate("INSERT INTO test_schema_evolution_drop_end VALUES (0, 1, 2)", 1); assertQuery("SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, 2)"); assertUpdate("ALTER TABLE test_schema_evolution_drop_end DROP COLUMN col2"); @@ -942,7 +1138,8 @@ public void testSchemaEvolution() { assertQuery("SELECT * FROM test_schema_evolution_drop_end", "VALUES(0, 1, NULL), (3, 4, 5)"); dropTable("test_schema_evolution_drop_end"); - assertUpdate("CREATE TABLE test_schema_evolution_drop_middle (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); + assertUpdate( + "CREATE TABLE test_schema_evolution_drop_middle (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); assertUpdate("INSERT INTO test_schema_evolution_drop_middle VALUES (0, 1, 2)", 1); assertQuery("SELECT * FROM test_schema_evolution_drop_middle", "VALUES(0, 1, 2)"); assertUpdate("ALTER TABLE test_schema_evolution_drop_middle DROP COLUMN col1"); @@ -955,7 +1152,8 @@ public void testSchemaEvolution() { @Test public void testShowStatsAfterAddColumn() { - assertUpdate("CREATE TABLE test_show_stats_after_add_column (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); + assertUpdate( + "CREATE TABLE test_show_stats_after_add_column (col0 INTEGER, col1 INTEGER, col2 INTEGER)"); // Insert separately to ensure the table has multiple data files assertUpdate("INSERT INTO test_show_stats_after_add_column VALUES (1, 2, 3)", 1); assertUpdate("INSERT INTO test_show_stats_after_add_column VALUES (4, 5, 6)", 1); @@ -964,34 +1162,37 @@ public void testShowStatsAfterAddColumn() { assertThat(query("SHOW STATS FOR test_show_stats_after_add_column")) .skippingTypesCheck() - .matches("VALUES " + - " ('col0', NULL, NULL, 25e-2, NULL, '1', '7')," + - " ('col1', NULL, NULL, 25e-2, NULL, '2', '8'), " + - " ('col2', NULL, NULL, 25e-2, NULL, '3', '9'), " + - " (NULL, NULL, NULL, NULL, 4e0, NULL, NULL)"); - - // Columns added after some data files exist will not have valid statistics because not all files have min/max/null count statistics for the new column + .matches( + "VALUES " + + " ('col0', NULL, NULL, 25e-2, NULL, '1', '7')," + + " ('col1', NULL, NULL, 25e-2, NULL, '2', '8'), " + + " ('col2', NULL, NULL, 25e-2, NULL, '3', '9'), " + + " (NULL, NULL, NULL, NULL, 4e0, NULL, NULL)"); + + // Columns added after some data files exist will not have valid statistics because not all + // files have min/max/null count statistics for the new column assertUpdate("ALTER TABLE test_show_stats_after_add_column ADD COLUMN col3 INTEGER"); assertUpdate("INSERT INTO test_show_stats_after_add_column VALUES (10, 11, 12, 13)", 1); assertThat(query("SHOW STATS FOR test_show_stats_after_add_column")) .skippingTypesCheck() - .matches("VALUES " + - " ('col0', NULL, NULL, 2e-1, NULL, '1', '10')," + - " ('col1', NULL, NULL, 2e-1, NULL, '2', '11'), " + - " ('col2', NULL, NULL, 2e-1, NULL, '3', '12'), " + - " ('col3', NULL, NULL, NULL, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 5e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('col0', NULL, NULL, 2e-1, NULL, '1', '10')," + + " ('col1', NULL, NULL, 2e-1, NULL, '2', '11'), " + + " ('col2', NULL, NULL, 2e-1, NULL, '3', '12'), " + + " ('col3', NULL, NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 5e0, NULL, NULL)"); } @Test public void testLargeInOnPartitionedColumns() { - assertUpdate("CREATE TABLE test_in_predicate_large_set (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col2'])"); + assertUpdate( + "CREATE TABLE test_in_predicate_large_set (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col2'])"); assertUpdate("INSERT INTO test_in_predicate_large_set VALUES (1, 10)", 1L); assertUpdate("INSERT INTO test_in_predicate_large_set VALUES (2, 20)", 1L); - List predicates = IntStream.range(0, 25_000).boxed() - .map(Object::toString) - .collect(toImmutableList()); + List predicates = + IntStream.range(0, 25_000).boxed().map(Object::toString).collect(toImmutableList()); String filter = format("col2 IN (%s)", join(",", predicates)); assertThat(query("SELECT * FROM test_in_predicate_large_set WHERE " + filter)) .matches("TABLE test_in_predicate_large_set"); @@ -1009,45 +1210,71 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat) { File tempDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().toFile(); String tempDirPath = tempDir.toURI().toASCIIString() + randomNameSuffix(); - // LIKE source INCLUDING PROPERTIES copies all the properties of the source table, including the `location`. + // LIKE source INCLUDING PROPERTIES copies all the properties of the source table, including the + // `location`. // For this reason the source and the copied table will share the same directory. - // This test does not drop intentionally the created tables to avoid affecting the source table or the information_schema. - assertUpdate(format("CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = '%s', partitioning = ARRAY['aDate'])", format)); - Assert.assertEquals(getTablePropertiesString("test_create_table_like_original"), "WITH (\n" + - format(" format = '%s',\n", format) + - " format_version = 2,\n" + -// format(" location = '%s',\n", tempDirPath) + - " partitioning = ARRAY['adate']\n" + - ")"); - - assertUpdate("CREATE TABLE test_create_table_like_copy0 (LIKE test_create_table_like_original, col2 INTEGER)"); - assertUpdate("INSERT INTO test_create_table_like_copy0 (col1, aDate, col2) VALUES (1, CAST('1950-06-28' AS DATE), 3)", 1); - assertQuery("SELECT * from test_create_table_like_copy0", "VALUES(1, CAST('1950-06-28' AS DATE), 3)"); - - assertUpdate("CREATE TABLE test_create_table_like_copy1 (LIKE test_create_table_like_original)"); - Assert.assertEquals(getTablePropertiesString("test_create_table_like_copy1"), "WITH (\n" + - format(" format = '%s',\n format_version = 2\n)", format)); - - assertUpdate("CREATE TABLE test_create_table_like_copy2 (LIKE test_create_table_like_original EXCLUDING PROPERTIES)"); - Assert.assertEquals(getTablePropertiesString("test_create_table_like_copy2"), "WITH (\n" + - format(" format = '%s',\n format_version = 2\n)", format)); + // This test does not drop intentionally the created tables to avoid affecting the source table + // or the information_schema. + assertUpdate( + format( + "CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = '%s', partitioning = ARRAY['aDate'])", + format)); + Assert.assertEquals( + getTablePropertiesString("test_create_table_like_original"), + "WITH (\n" + + format(" format = '%s',\n", format) + + " format_version = 2,\n" + + + // format(" location = '%s',\n", tempDirPath) + + " partitioning = ARRAY['adate']\n" + + ")"); + + assertUpdate( + "CREATE TABLE test_create_table_like_copy0 (LIKE test_create_table_like_original, col2 INTEGER)"); + assertUpdate( + "INSERT INTO test_create_table_like_copy0 (col1, aDate, col2) VALUES (1, CAST('1950-06-28' AS DATE), 3)", + 1); + assertQuery( + "SELECT * from test_create_table_like_copy0", "VALUES(1, CAST('1950-06-28' AS DATE), 3)"); + + assertUpdate( + "CREATE TABLE test_create_table_like_copy1 (LIKE test_create_table_like_original)"); + Assert.assertEquals( + getTablePropertiesString("test_create_table_like_copy1"), + "WITH (\n" + format(" format = '%s',\n format_version = 2\n)", format)); + + assertUpdate( + "CREATE TABLE test_create_table_like_copy2 (LIKE test_create_table_like_original EXCLUDING PROPERTIES)"); + Assert.assertEquals( + getTablePropertiesString("test_create_table_like_copy2"), + "WITH (\n" + format(" format = '%s',\n format_version = 2\n)", format)); dropTable("test_create_table_like_copy2"); - assertUpdate("CREATE TABLE test_create_table_like_copy3 (LIKE test_create_table_like_original INCLUDING PROPERTIES)"); - Assert.assertEquals(getTablePropertiesString("test_create_table_like_copy3"), "WITH (\n" + - format(" format = '%s',\n", format) + - " format_version = 2,\n" + -// format(" location = '%s',\n", tempDirPath) + - " partitioning = ARRAY['adate']\n" + - ")"); + assertUpdate( + "CREATE TABLE test_create_table_like_copy3 (LIKE test_create_table_like_original INCLUDING PROPERTIES)"); + Assert.assertEquals( + getTablePropertiesString("test_create_table_like_copy3"), + "WITH (\n" + + format(" format = '%s',\n", format) + + " format_version = 2,\n" + + + // format(" location = '%s',\n", tempDirPath) + + " partitioning = ARRAY['adate']\n" + + ")"); - assertUpdate(format("CREATE TABLE test_create_table_like_copy4 (LIKE test_create_table_like_original INCLUDING PROPERTIES) WITH (format = '%s')", otherFormat)); - Assert.assertEquals(getTablePropertiesString("test_create_table_like_copy4"), "WITH (\n" + - format(" format = '%s',\n", otherFormat) + - " format_version = 2,\n" + -// format(" location = '%s',\n", tempDirPath) + - " partitioning = ARRAY['adate']\n" + - ")"); + assertUpdate( + format( + "CREATE TABLE test_create_table_like_copy4 (LIKE test_create_table_like_original INCLUDING PROPERTIES) WITH (format = '%s')", + otherFormat)); + Assert.assertEquals( + getTablePropertiesString("test_create_table_like_copy4"), + "WITH (\n" + + format(" format = '%s',\n", otherFormat) + + " format_version = 2,\n" + + + // format(" location = '%s',\n", tempDirPath) + + " partitioning = ARRAY['adate']\n" + + ")"); } private String getTablePropertiesString(String tableName) { @@ -1067,409 +1294,495 @@ public void testPredicating() { @Test public void testHourTransform() { - assertUpdate("CREATE TABLE test_hour_transform (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['hour(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(TIMESTAMP '1969-12-31 22:22:22.222222', 8)," + - "(TIMESTAMP '1969-12-31 23:33:11.456789', 9)," + - "(TIMESTAMP '1969-12-31 23:44:55.567890', 10)," + - "(TIMESTAMP '1970-01-01 00:55:44.765432', 11)," + - "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + - "(TIMESTAMP '2015-01-01 10:10:02.987654', 2)," + - "(TIMESTAMP '2015-01-01 10:55:00.456789', 3)," + - "(TIMESTAMP '2015-05-15 12:05:01.234567', 4)," + - "(TIMESTAMP '2015-05-15 12:21:02.345678', 5)," + - "(TIMESTAMP '2020-02-21 13:11:11.876543', 6)," + - "(TIMESTAMP '2020-02-21 13:12:12.654321', 7)"; + assertUpdate( + "CREATE TABLE test_hour_transform (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['hour(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(TIMESTAMP '1969-12-31 22:22:22.222222', 8)," + + "(TIMESTAMP '1969-12-31 23:33:11.456789', 9)," + + "(TIMESTAMP '1969-12-31 23:44:55.567890', 10)," + + "(TIMESTAMP '1970-01-01 00:55:44.765432', 11)," + + "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + + "(TIMESTAMP '2015-01-01 10:10:02.987654', 2)," + + "(TIMESTAMP '2015-01-01 10:55:00.456789', 3)," + + "(TIMESTAMP '2015-05-15 12:05:01.234567', 4)," + + "(TIMESTAMP '2015-05-15 12:21:02.345678', 5)," + + "(TIMESTAMP '2020-02-21 13:11:11.876543', 6)," + + "(TIMESTAMP '2020-02-21 13:12:12.654321', 7)"; assertUpdate("INSERT INTO test_hour_transform " + values, 11); assertQuery("SELECT * FROM test_hour_transform", values); - @Language("SQL") String expected = "VALUES " + - "(-2, 1, TIMESTAMP '1969-12-31 22:22:22.222222', TIMESTAMP '1969-12-31 22:22:22.222222', 8, 8), " + - "(-1, 2, TIMESTAMP '1969-12-31 23:33:11.456789', TIMESTAMP '1969-12-31 23:44:55.567890', 9, 10), " + - "(0, 1, TIMESTAMP '1970-01-01 00:55:44.765432', TIMESTAMP '1970-01-01 00:55:44.765432', 11, 11), " + - "(394474, 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 10:55:00.456789', 1, 3), " + - "(397692, 2, TIMESTAMP '2015-05-15 12:05:01.234567', TIMESTAMP '2015-05-15 12:21:02.345678', 4, 5), " + - "(439525, 2, TIMESTAMP '2020-02-21 13:11:11.876543', TIMESTAMP '2020-02-21 13:12:12.654321', 6, 7)"; + @Language("SQL") + String expected = + "VALUES " + + "(-2, 1, TIMESTAMP '1969-12-31 22:22:22.222222', TIMESTAMP '1969-12-31 22:22:22.222222', 8, 8), " + + "(-1, 2, TIMESTAMP '1969-12-31 23:33:11.456789', TIMESTAMP '1969-12-31 23:44:55.567890', 9, 10), " + + "(0, 1, TIMESTAMP '1970-01-01 00:55:44.765432', TIMESTAMP '1970-01-01 00:55:44.765432', 11, 11), " + + "(394474, 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 10:55:00.456789', 1, 3), " + + "(397692, 2, TIMESTAMP '2015-05-15 12:05:01.234567', TIMESTAMP '2015-05-15 12:21:02.345678', 4, 5), " + + "(439525, 2, TIMESTAMP '2020-02-21 13:11:11.876543', TIMESTAMP '2020-02-21 13:12:12.654321', 6, 7)"; String expectedTimestampStats = "'1969-12-31 22:22:22.222222', '2020-02-21 13:12:12.654321'"; if (format == ORC) { - expected = "VALUES " + - "(-2, 1, TIMESTAMP '1969-12-31 22:22:22.222000', TIMESTAMP '1969-12-31 22:22:22.222999', 8, 8), " + - "(-1, 2, TIMESTAMP '1969-12-31 23:33:11.456000', TIMESTAMP '1969-12-31 23:44:55.567999', 9, 10), " + - "(0, 1, TIMESTAMP '1970-01-01 00:55:44.765000', TIMESTAMP '1970-01-01 00:55:44.765999', 11, 11), " + - "(394474, 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 10:55:00.456999', 1, 3), " + - "(397692, 2, TIMESTAMP '2015-05-15 12:05:01.234000', TIMESTAMP '2015-05-15 12:21:02.345999', 4, 5), " + - "(439525, 2, TIMESTAMP '2020-02-21 13:11:11.876000', TIMESTAMP '2020-02-21 13:12:12.654999', 6, 7)"; + expected = + "VALUES " + + "(-2, 1, TIMESTAMP '1969-12-31 22:22:22.222000', TIMESTAMP '1969-12-31 22:22:22.222999', 8, 8), " + + "(-1, 2, TIMESTAMP '1969-12-31 23:33:11.456000', TIMESTAMP '1969-12-31 23:44:55.567999', 9, 10), " + + "(0, 1, TIMESTAMP '1970-01-01 00:55:44.765000', TIMESTAMP '1970-01-01 00:55:44.765999', 11, 11), " + + "(394474, 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 10:55:00.456999', 1, 3), " + + "(397692, 2, TIMESTAMP '2015-05-15 12:05:01.234000', TIMESTAMP '2015-05-15 12:21:02.345999', 4, 5), " + + "(439525, 2, TIMESTAMP '2020-02-21 13:11:11.876000', TIMESTAMP '2020-02-21 13:12:12.654999', 6, 7)"; expectedTimestampStats = "'1969-12-31 22:22:22.222000', '2020-02-21 13:12:12.654999'"; } - assertQuery("SELECT partition.d_hour, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_hour_transform$partitions\"", expected); + assertQuery( + "SELECT partition.d_hour, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_hour_transform$partitions\"", + expected); - // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable + // predicates assertQuery( "SELECT * FROM test_hour_transform WHERE day_of_week(d) = 3 AND b % 7 = 3", "VALUES (TIMESTAMP '1969-12-31 23:44:55.567890', 10)"); assertThat(query("SHOW STATS FOR test_hour_transform")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, " + expectedTimestampStats + "), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '11'), " + - " (NULL, NULL, NULL, NULL, 11e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, " + + expectedTimestampStats + + "), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '11'), " + + " (NULL, NULL, NULL, NULL, 11e0, NULL, NULL)"); dropTable("test_hour_transform"); } @Test public void testDayTransformDate() { - assertUpdate("CREATE TABLE test_day_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['day(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(DATE '1969-01-01', 10), " + - "(DATE '1969-12-31', 11), " + - "(DATE '1970-01-01', 1), " + - "(DATE '1970-03-04', 2), " + - "(DATE '2015-01-01', 3), " + - "(DATE '2015-01-13', 4), " + - "(DATE '2015-01-13', 5), " + - "(DATE '2015-05-15', 6), " + - "(DATE '2015-05-15', 7), " + - "(DATE '2020-02-21', 8), " + - "(DATE '2020-02-21', 9)"; + assertUpdate( + "CREATE TABLE test_day_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['day(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(DATE '1969-01-01', 10), " + + "(DATE '1969-12-31', 11), " + + "(DATE '1970-01-01', 1), " + + "(DATE '1970-03-04', 2), " + + "(DATE '2015-01-01', 3), " + + "(DATE '2015-01-13', 4), " + + "(DATE '2015-01-13', 5), " + + "(DATE '2015-05-15', 6), " + + "(DATE '2015-05-15', 7), " + + "(DATE '2020-02-21', 8), " + + "(DATE '2020-02-21', 9)"; assertUpdate("INSERT INTO test_day_transform_date " + values, 11); assertQuery("SELECT * FROM test_day_transform_date", values); assertQuery( "SELECT partition.d_day, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_day_transform_date$partitions\"", - "VALUES " + - "(DATE '1969-01-01', 1, DATE '1969-01-01', DATE '1969-01-01', 10, 10), " + - "(DATE '1969-12-31', 1, DATE '1969-12-31', DATE '1969-12-31', 11, 11), " + - "(DATE '1970-01-01', 1, DATE '1970-01-01', DATE '1970-01-01', 1, 1), " + - "(DATE '1970-03-04', 1, DATE '1970-03-04', DATE '1970-03-04', 2, 2), " + - "(DATE '2015-01-01', 1, DATE '2015-01-01', DATE '2015-01-01', 3, 3), " + - "(DATE '2015-01-13', 2, DATE '2015-01-13', DATE '2015-01-13', 4, 5), " + - "(DATE '2015-05-15', 2, DATE '2015-05-15', DATE '2015-05-15', 6, 7), " + - "(DATE '2020-02-21', 2, DATE '2020-02-21', DATE '2020-02-21', 8, 9)"); - - // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + "VALUES " + + "(DATE '1969-01-01', 1, DATE '1969-01-01', DATE '1969-01-01', 10, 10), " + + "(DATE '1969-12-31', 1, DATE '1969-12-31', DATE '1969-12-31', 11, 11), " + + "(DATE '1970-01-01', 1, DATE '1970-01-01', DATE '1970-01-01', 1, 1), " + + "(DATE '1970-03-04', 1, DATE '1970-03-04', DATE '1970-03-04', 2, 2), " + + "(DATE '2015-01-01', 1, DATE '2015-01-01', DATE '2015-01-01', 3, 3), " + + "(DATE '2015-01-13', 2, DATE '2015-01-13', DATE '2015-01-13', 4, 5), " + + "(DATE '2015-05-15', 2, DATE '2015-05-15', DATE '2015-05-15', 6, 7), " + + "(DATE '2020-02-21', 2, DATE '2020-02-21', DATE '2020-02-21', 8, 9)"); + + // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable + // predicates assertQuery( "SELECT * FROM test_day_transform_date WHERE day_of_week(d) = 3 AND b % 7 = 3", "VALUES (DATE '1969-01-01', 10)"); assertThat(query("SHOW STATS FOR test_day_transform_date")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, '1969-01-01', '2020-02-21'), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '11'), " + - " (NULL, NULL, NULL, NULL, 11e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, '1969-01-01', '2020-02-21'), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '11'), " + + " (NULL, NULL, NULL, NULL, 11e0, NULL, NULL)"); dropTable("test_day_transform_date"); } @Test public void testDayTransformTimestamp() { - assertUpdate("CREATE TABLE test_day_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['day(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(TIMESTAMP '1969-12-25 15:13:12.876543', 8)," + - "(TIMESTAMP '1969-12-30 18:47:33.345678', 9)," + - "(TIMESTAMP '1969-12-31 00:00:00.000000', 10)," + - "(TIMESTAMP '1969-12-31 05:06:07.234567', 11)," + - "(TIMESTAMP '1970-01-01 12:03:08.456789', 12)," + - "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + - "(TIMESTAMP '2015-01-01 11:10:02.987654', 2)," + - "(TIMESTAMP '2015-01-01 12:55:00.456789', 3)," + - "(TIMESTAMP '2015-05-15 13:05:01.234567', 4)," + - "(TIMESTAMP '2015-05-15 14:21:02.345678', 5)," + - "(TIMESTAMP '2020-02-21 15:11:11.876543', 6)," + - "(TIMESTAMP '2020-02-21 16:12:12.654321', 7)"; + assertUpdate( + "CREATE TABLE test_day_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['day(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(TIMESTAMP '1969-12-25 15:13:12.876543', 8)," + + "(TIMESTAMP '1969-12-30 18:47:33.345678', 9)," + + "(TIMESTAMP '1969-12-31 00:00:00.000000', 10)," + + "(TIMESTAMP '1969-12-31 05:06:07.234567', 11)," + + "(TIMESTAMP '1970-01-01 12:03:08.456789', 12)," + + "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + + "(TIMESTAMP '2015-01-01 11:10:02.987654', 2)," + + "(TIMESTAMP '2015-01-01 12:55:00.456789', 3)," + + "(TIMESTAMP '2015-05-15 13:05:01.234567', 4)," + + "(TIMESTAMP '2015-05-15 14:21:02.345678', 5)," + + "(TIMESTAMP '2020-02-21 15:11:11.876543', 6)," + + "(TIMESTAMP '2020-02-21 16:12:12.654321', 7)"; assertUpdate("INSERT INTO test_day_transform_timestamp " + values, 12); assertQuery("SELECT * FROM test_day_transform_timestamp", values); - @Language("SQL") String expected = "VALUES " + - "(DATE '1969-12-25', 1, TIMESTAMP '1969-12-25 15:13:12.876543', TIMESTAMP '1969-12-25 15:13:12.876543', 8, 8), " + - "(DATE '1969-12-30', 1, TIMESTAMP '1969-12-30 18:47:33.345678', TIMESTAMP '1969-12-30 18:47:33.345678', 9, 9), " + - "(DATE '1969-12-31', 2, TIMESTAMP '1969-12-31 00:00:00.000000', TIMESTAMP '1969-12-31 05:06:07.234567', 10, 11), " + - "(DATE '1970-01-01', 1, TIMESTAMP '1970-01-01 12:03:08.456789', TIMESTAMP '1970-01-01 12:03:08.456789', 12, 12), " + - "(DATE '2015-01-01', 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 12:55:00.456789', 1, 3), " + - "(DATE '2015-05-15', 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-05-15 14:21:02.345678', 4, 5), " + - "(DATE '2020-02-21', 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-02-21 16:12:12.654321', 6, 7)"; + @Language("SQL") + String expected = + "VALUES " + + "(DATE '1969-12-25', 1, TIMESTAMP '1969-12-25 15:13:12.876543', TIMESTAMP '1969-12-25 15:13:12.876543', 8, 8), " + + "(DATE '1969-12-30', 1, TIMESTAMP '1969-12-30 18:47:33.345678', TIMESTAMP '1969-12-30 18:47:33.345678', 9, 9), " + + "(DATE '1969-12-31', 2, TIMESTAMP '1969-12-31 00:00:00.000000', TIMESTAMP '1969-12-31 05:06:07.234567', 10, 11), " + + "(DATE '1970-01-01', 1, TIMESTAMP '1970-01-01 12:03:08.456789', TIMESTAMP '1970-01-01 12:03:08.456789', 12, 12), " + + "(DATE '2015-01-01', 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 12:55:00.456789', 1, 3), " + + "(DATE '2015-05-15', 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-05-15 14:21:02.345678', 4, 5), " + + "(DATE '2020-02-21', 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-02-21 16:12:12.654321', 6, 7)"; String expectedTimestampStats = "'1969-12-25 15:13:12.876543', '2020-02-21 16:12:12.654321'"; if (format == ORC) { - expected = "VALUES " + - "(DATE '1969-12-25', 1, TIMESTAMP '1969-12-25 15:13:12.876000', TIMESTAMP '1969-12-25 15:13:12.876999', 8, 8), " + - "(DATE '1969-12-30', 1, TIMESTAMP '1969-12-30 18:47:33.345000', TIMESTAMP '1969-12-30 18:47:33.345999', 9, 9), " + - "(DATE '1969-12-31', 2, TIMESTAMP '1969-12-31 00:00:00.000000', TIMESTAMP '1969-12-31 05:06:07.234999', 10, 11), " + - "(DATE '1970-01-01', 1, TIMESTAMP '1970-01-01 12:03:08.456000', TIMESTAMP '1970-01-01 12:03:08.456999', 12, 12), " + - "(DATE '2015-01-01', 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 12:55:00.456999', 1, 3), " + - "(DATE '2015-05-15', 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-05-15 14:21:02.345999', 4, 5), " + - "(DATE '2020-02-21', 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-02-21 16:12:12.654999', 6, 7)"; + expected = + "VALUES " + + "(DATE '1969-12-25', 1, TIMESTAMP '1969-12-25 15:13:12.876000', TIMESTAMP '1969-12-25 15:13:12.876999', 8, 8), " + + "(DATE '1969-12-30', 1, TIMESTAMP '1969-12-30 18:47:33.345000', TIMESTAMP '1969-12-30 18:47:33.345999', 9, 9), " + + "(DATE '1969-12-31', 2, TIMESTAMP '1969-12-31 00:00:00.000000', TIMESTAMP '1969-12-31 05:06:07.234999', 10, 11), " + + "(DATE '1970-01-01', 1, TIMESTAMP '1970-01-01 12:03:08.456000', TIMESTAMP '1970-01-01 12:03:08.456999', 12, 12), " + + "(DATE '2015-01-01', 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 12:55:00.456999', 1, 3), " + + "(DATE '2015-05-15', 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-05-15 14:21:02.345999', 4, 5), " + + "(DATE '2020-02-21', 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-02-21 16:12:12.654999', 6, 7)"; expectedTimestampStats = "'1969-12-25 15:13:12.876000', '2020-02-21 16:12:12.654999'"; } - assertQuery("SELECT partition.d_day, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_day_transform_timestamp$partitions\"", expected); + assertQuery( + "SELECT partition.d_day, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_day_transform_timestamp$partitions\"", + expected); - // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise arcticMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable + // predicates assertQuery( "SELECT * FROM test_day_transform_timestamp WHERE day_of_week(d) = 3 AND b % 7 = 3", "VALUES (TIMESTAMP '1969-12-31 00:00:00.000000', 10)"); assertThat(query("SHOW STATS FOR test_day_transform_timestamp")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, " + expectedTimestampStats + "), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + - " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, " + + expectedTimestampStats + + "), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); dropTable("test_day_transform_timestamp"); } @Test public void testMonthTransformDate() { - assertUpdate("CREATE TABLE test_month_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['month(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(DATE '1969-11-13', 1)," + - "(DATE '1969-12-01', 2)," + - "(DATE '1969-12-02', 3)," + - "(DATE '1969-12-31', 4)," + - "(DATE '1970-01-01', 5), " + - "(DATE '1970-05-13', 6), " + - "(DATE '1970-12-31', 7), " + - "(DATE '2020-01-01', 8), " + - "(DATE '2020-06-16', 9), " + - "(DATE '2020-06-28', 10), " + - "(DATE '2020-06-06', 11), " + - "(DATE '2020-07-18', 12), " + - "(DATE '2020-07-28', 13), " + - "(DATE '2020-12-31', 14)"; + assertUpdate( + "CREATE TABLE test_month_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['month(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(DATE '1969-11-13', 1)," + + "(DATE '1969-12-01', 2)," + + "(DATE '1969-12-02', 3)," + + "(DATE '1969-12-31', 4)," + + "(DATE '1970-01-01', 5), " + + "(DATE '1970-05-13', 6), " + + "(DATE '1970-12-31', 7), " + + "(DATE '2020-01-01', 8), " + + "(DATE '2020-06-16', 9), " + + "(DATE '2020-06-28', 10), " + + "(DATE '2020-06-06', 11), " + + "(DATE '2020-07-18', 12), " + + "(DATE '2020-07-28', 13), " + + "(DATE '2020-12-31', 14)"; assertUpdate("INSERT INTO test_month_transform_date " + values, 14); assertQuery("SELECT * FROM test_month_transform_date", values); assertQuery( "SELECT partition.d_month, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_month_transform_date$partitions\"", - "VALUES " + - "(-2, 1, DATE '1969-11-13', DATE '1969-11-13', 1, 1), " + - "(-1, 3, DATE '1969-12-01', DATE '1969-12-31', 2, 4), " + - "(0, 1, DATE '1970-01-01', DATE '1970-01-01', 5, 5), " + - "(4, 1, DATE '1970-05-13', DATE '1970-05-13', 6, 6), " + - "(11, 1, DATE '1970-12-31', DATE '1970-12-31', 7, 7), " + - "(600, 1, DATE '2020-01-01', DATE '2020-01-01', 8, 8), " + - "(605, 3, DATE '2020-06-06', DATE '2020-06-28', 9, 11), " + - "(606, 2, DATE '2020-07-18', DATE '2020-07-28', 12, 13), " + - "(611, 1, DATE '2020-12-31', DATE '2020-12-31', 14, 14)"); - - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + "VALUES " + + "(-2, 1, DATE '1969-11-13', DATE '1969-11-13', 1, 1), " + + "(-1, 3, DATE '1969-12-01', DATE '1969-12-31', 2, 4), " + + "(0, 1, DATE '1970-01-01', DATE '1970-01-01', 5, 5), " + + "(4, 1, DATE '1970-05-13', DATE '1970-05-13', 6, 6), " + + "(11, 1, DATE '1970-12-31', DATE '1970-12-31', 7, 7), " + + "(600, 1, DATE '2020-01-01', DATE '2020-01-01', 8, 8), " + + "(605, 3, DATE '2020-06-06', DATE '2020-06-28', 9, 11), " + + "(606, 2, DATE '2020-07-18', DATE '2020-07-28', 12, 13), " + + "(611, 1, DATE '2020-12-31', DATE '2020-12-31', 14, 14)"); + + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_month_transform_date WHERE day_of_week(d) = 7 AND b % 7 = 3", "VALUES (DATE '2020-06-28', 10)"); assertThat(query("SHOW STATS FOR test_month_transform_date")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, '1969-11-13', '2020-12-31'), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '14'), " + - " (NULL, NULL, NULL, NULL, 14e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, '1969-11-13', '2020-12-31'), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '14'), " + + " (NULL, NULL, NULL, NULL, 14e0, NULL, NULL)"); dropTable("test_month_transform_date"); } @Test public void testMonthTransformTimestamp() { - assertUpdate("CREATE TABLE test_month_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['month(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(TIMESTAMP '1969-11-15 15:13:12.876543', 8)," + - "(TIMESTAMP '1969-11-19 18:47:33.345678', 9)," + - "(TIMESTAMP '1969-12-01 00:00:00.000000', 10)," + - "(TIMESTAMP '1969-12-01 05:06:07.234567', 11)," + - "(TIMESTAMP '1970-01-01 12:03:08.456789', 12)," + - "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + - "(TIMESTAMP '2015-01-01 11:10:02.987654', 2)," + - "(TIMESTAMP '2015-01-01 12:55:00.456789', 3)," + - "(TIMESTAMP '2015-05-15 13:05:01.234567', 4)," + - "(TIMESTAMP '2015-05-15 14:21:02.345678', 5)," + - "(TIMESTAMP '2020-02-21 15:11:11.876543', 6)," + - "(TIMESTAMP '2020-02-21 16:12:12.654321', 7)"; + assertUpdate( + "CREATE TABLE test_month_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['month(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(TIMESTAMP '1969-11-15 15:13:12.876543', 8)," + + "(TIMESTAMP '1969-11-19 18:47:33.345678', 9)," + + "(TIMESTAMP '1969-12-01 00:00:00.000000', 10)," + + "(TIMESTAMP '1969-12-01 05:06:07.234567', 11)," + + "(TIMESTAMP '1970-01-01 12:03:08.456789', 12)," + + "(TIMESTAMP '2015-01-01 10:01:23.123456', 1)," + + "(TIMESTAMP '2015-01-01 11:10:02.987654', 2)," + + "(TIMESTAMP '2015-01-01 12:55:00.456789', 3)," + + "(TIMESTAMP '2015-05-15 13:05:01.234567', 4)," + + "(TIMESTAMP '2015-05-15 14:21:02.345678', 5)," + + "(TIMESTAMP '2020-02-21 15:11:11.876543', 6)," + + "(TIMESTAMP '2020-02-21 16:12:12.654321', 7)"; assertUpdate("INSERT INTO test_month_transform_timestamp " + values, 12); assertQuery("SELECT * FROM test_month_transform_timestamp", values); - @Language("SQL") String expected = "VALUES " + - "(-2, 2, TIMESTAMP '1969-11-15 15:13:12.876543', TIMESTAMP '1969-11-19 18:47:33.345678', 8, 9), " + - "(-1, 2, TIMESTAMP '1969-12-01 00:00:00.000000', TIMESTAMP '1969-12-01 05:06:07.234567', 10, 11), " + - "(0, 1, TIMESTAMP '1970-01-01 12:03:08.456789', TIMESTAMP '1970-01-01 12:03:08.456789', 12, 12), " + - "(540, 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 12:55:00.456789', 1, 3), " + - "(544, 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-05-15 14:21:02.345678', 4, 5), " + - "(601, 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-02-21 16:12:12.654321', 6, 7)"; + @Language("SQL") + String expected = + "VALUES " + + "(-2, 2, TIMESTAMP '1969-11-15 15:13:12.876543', TIMESTAMP '1969-11-19 18:47:33.345678', 8, 9), " + + "(-1, 2, TIMESTAMP '1969-12-01 00:00:00.000000', TIMESTAMP '1969-12-01 05:06:07.234567', 10, 11), " + + "(0, 1, TIMESTAMP '1970-01-01 12:03:08.456789', TIMESTAMP '1970-01-01 12:03:08.456789', 12, 12), " + + "(540, 3, TIMESTAMP '2015-01-01 10:01:23.123456', TIMESTAMP '2015-01-01 12:55:00.456789', 1, 3), " + + "(544, 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-05-15 14:21:02.345678', 4, 5), " + + "(601, 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-02-21 16:12:12.654321', 6, 7)"; String expectedTimestampStats = "'1969-11-15 15:13:12.876543', '2020-02-21 16:12:12.654321'"; if (format == ORC) { - expected = "VALUES " + - "(-2, 2, TIMESTAMP '1969-11-15 15:13:12.876000', TIMESTAMP '1969-11-19 18:47:33.345999', 8, 9), " + - "(-1, 2, TIMESTAMP '1969-12-01 00:00:00.000000', TIMESTAMP '1969-12-01 05:06:07.234999', 10, 11), " + - "(0, 1, TIMESTAMP '1970-01-01 12:03:08.456000', TIMESTAMP '1970-01-01 12:03:08.456999', 12, 12), " + - "(540, 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 12:55:00.456999', 1, 3), " + - "(544, 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-05-15 14:21:02.345999', 4, 5), " + - "(601, 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-02-21 16:12:12.654999', 6, 7)"; + expected = + "VALUES " + + "(-2, 2, TIMESTAMP '1969-11-15 15:13:12.876000', TIMESTAMP '1969-11-19 18:47:33.345999', 8, 9), " + + "(-1, 2, TIMESTAMP '1969-12-01 00:00:00.000000', TIMESTAMP '1969-12-01 05:06:07.234999', 10, 11), " + + "(0, 1, TIMESTAMP '1970-01-01 12:03:08.456000', TIMESTAMP '1970-01-01 12:03:08.456999', 12, 12), " + + "(540, 3, TIMESTAMP '2015-01-01 10:01:23.123000', TIMESTAMP '2015-01-01 12:55:00.456999', 1, 3), " + + "(544, 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-05-15 14:21:02.345999', 4, 5), " + + "(601, 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-02-21 16:12:12.654999', 6, 7)"; expectedTimestampStats = "'1969-11-15 15:13:12.876000', '2020-02-21 16:12:12.654999'"; } - assertQuery("SELECT partition.d_month, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_month_transform_timestamp$partitions\"", expected); + assertQuery( + "SELECT partition.d_month, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_month_transform_timestamp$partitions\"", + expected); - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_month_transform_timestamp WHERE day_of_week(d) = 1 AND b % 7 = 3", "VALUES (TIMESTAMP '1969-12-01 00:00:00.000000', 10)"); assertThat(query("SHOW STATS FOR test_month_transform_timestamp")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, " + expectedTimestampStats + "), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + - " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, " + + expectedTimestampStats + + "), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); dropTable("test_month_transform_timestamp"); } @Test public void testYearTransformDate() { - assertUpdate("CREATE TABLE test_year_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['year(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(DATE '1968-10-13', 1), " + - "(DATE '1969-01-01', 2), " + - "(DATE '1969-03-15', 3), " + - "(DATE '1970-01-01', 4), " + - "(DATE '1970-03-05', 5), " + - "(DATE '2015-01-01', 6), " + - "(DATE '2015-06-16', 7), " + - "(DATE '2015-07-28', 8), " + - "(DATE '2016-05-15', 9), " + - "(DATE '2016-06-06', 10), " + - "(DATE '2020-02-21', 11), " + - "(DATE '2020-11-10', 12)"; + assertUpdate( + "CREATE TABLE test_year_transform_date (d DATE, b BIGINT) WITH (partitioning = ARRAY['year(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(DATE '1968-10-13', 1), " + + "(DATE '1969-01-01', 2), " + + "(DATE '1969-03-15', 3), " + + "(DATE '1970-01-01', 4), " + + "(DATE '1970-03-05', 5), " + + "(DATE '2015-01-01', 6), " + + "(DATE '2015-06-16', 7), " + + "(DATE '2015-07-28', 8), " + + "(DATE '2016-05-15', 9), " + + "(DATE '2016-06-06', 10), " + + "(DATE '2020-02-21', 11), " + + "(DATE '2020-11-10', 12)"; assertUpdate("INSERT INTO test_year_transform_date " + values, 12); assertQuery("SELECT * FROM test_year_transform_date", values); assertQuery( "SELECT partition.d_year, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_year_transform_date$partitions\"", - "VALUES " + - "(-2, 1, DATE '1968-10-13', DATE '1968-10-13', 1, 1), " + - "(-1, 2, DATE '1969-01-01', DATE '1969-03-15', 2, 3), " + - "(0, 2, DATE '1970-01-01', DATE '1970-03-05', 4, 5), " + - "(45, 3, DATE '2015-01-01', DATE '2015-07-28', 6, 8), " + - "(46, 2, DATE '2016-05-15', DATE '2016-06-06', 9, 10), " + - "(50, 2, DATE '2020-02-21', DATE '2020-11-10', 11, 12)"); - - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + "VALUES " + + "(-2, 1, DATE '1968-10-13', DATE '1968-10-13', 1, 1), " + + "(-1, 2, DATE '1969-01-01', DATE '1969-03-15', 2, 3), " + + "(0, 2, DATE '1970-01-01', DATE '1970-03-05', 4, 5), " + + "(45, 3, DATE '2015-01-01', DATE '2015-07-28', 6, 8), " + + "(46, 2, DATE '2016-05-15', DATE '2016-06-06', 9, 10), " + + "(50, 2, DATE '2020-02-21', DATE '2020-11-10', 11, 12)"); + + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_year_transform_date WHERE day_of_week(d) = 1 AND b % 7 = 3", "VALUES (DATE '2016-06-06', 10)"); assertThat(query("SHOW STATS FOR test_year_transform_date")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, '1968-10-13', '2020-11-10'), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + - " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, '1968-10-13', '2020-11-10'), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); dropTable("test_year_transform_date"); } @Test public void testYearTransformTimestamp() { - assertUpdate("CREATE TABLE test_year_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['year(d)'])"); - - @Language("SQL") String values = "VALUES " + - "(TIMESTAMP '1968-03-15 15:13:12.876543', 1)," + - "(TIMESTAMP '1968-11-19 18:47:33.345678', 2)," + - "(TIMESTAMP '1969-01-01 00:00:00.000000', 3)," + - "(TIMESTAMP '1969-01-01 05:06:07.234567', 4)," + - "(TIMESTAMP '1970-01-18 12:03:08.456789', 5)," + - "(TIMESTAMP '1970-03-14 10:01:23.123456', 6)," + - "(TIMESTAMP '1970-08-19 11:10:02.987654', 7)," + - "(TIMESTAMP '1970-12-31 12:55:00.456789', 8)," + - "(TIMESTAMP '2015-05-15 13:05:01.234567', 9)," + - "(TIMESTAMP '2015-09-15 14:21:02.345678', 10)," + - "(TIMESTAMP '2020-02-21 15:11:11.876543', 11)," + - "(TIMESTAMP '2020-08-21 16:12:12.654321', 12)"; + assertUpdate( + "CREATE TABLE test_year_transform_timestamp (d TIMESTAMP(6), b BIGINT) WITH (partitioning = ARRAY['year(d)'])"); + + @Language("SQL") + String values = + "VALUES " + + "(TIMESTAMP '1968-03-15 15:13:12.876543', 1)," + + "(TIMESTAMP '1968-11-19 18:47:33.345678', 2)," + + "(TIMESTAMP '1969-01-01 00:00:00.000000', 3)," + + "(TIMESTAMP '1969-01-01 05:06:07.234567', 4)," + + "(TIMESTAMP '1970-01-18 12:03:08.456789', 5)," + + "(TIMESTAMP '1970-03-14 10:01:23.123456', 6)," + + "(TIMESTAMP '1970-08-19 11:10:02.987654', 7)," + + "(TIMESTAMP '1970-12-31 12:55:00.456789', 8)," + + "(TIMESTAMP '2015-05-15 13:05:01.234567', 9)," + + "(TIMESTAMP '2015-09-15 14:21:02.345678', 10)," + + "(TIMESTAMP '2020-02-21 15:11:11.876543', 11)," + + "(TIMESTAMP '2020-08-21 16:12:12.654321', 12)"; assertUpdate("INSERT INTO test_year_transform_timestamp " + values, 12); assertQuery("SELECT * FROM test_year_transform_timestamp", values); - @Language("SQL") String expected = "VALUES " + - "(-2, 2, TIMESTAMP '1968-03-15 15:13:12.876543', TIMESTAMP '1968-11-19 18:47:33.345678', 1, 2), " + - "(-1, 2, TIMESTAMP '1969-01-01 00:00:00.000000', TIMESTAMP '1969-01-01 05:06:07.234567', 3, 4), " + - "(0, 4, TIMESTAMP '1970-01-18 12:03:08.456789', TIMESTAMP '1970-12-31 12:55:00.456789', 5, 8), " + - "(45, 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-09-15 14:21:02.345678', 9, 10), " + - "(50, 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-08-21 16:12:12.654321', 11, 12)"; + @Language("SQL") + String expected = + "VALUES " + + "(-2, 2, TIMESTAMP '1968-03-15 15:13:12.876543', TIMESTAMP '1968-11-19 18:47:33.345678', 1, 2), " + + "(-1, 2, TIMESTAMP '1969-01-01 00:00:00.000000', TIMESTAMP '1969-01-01 05:06:07.234567', 3, 4), " + + "(0, 4, TIMESTAMP '1970-01-18 12:03:08.456789', TIMESTAMP '1970-12-31 12:55:00.456789', 5, 8), " + + "(45, 2, TIMESTAMP '2015-05-15 13:05:01.234567', TIMESTAMP '2015-09-15 14:21:02.345678', 9, 10), " + + "(50, 2, TIMESTAMP '2020-02-21 15:11:11.876543', TIMESTAMP '2020-08-21 16:12:12.654321', 11, 12)"; String expectedTimestampStats = "'1968-03-15 15:13:12.876543', '2020-08-21 16:12:12.654321'"; if (format == ORC) { - expected = "VALUES " + - "(-2, 2, TIMESTAMP '1968-03-15 15:13:12.876000', TIMESTAMP '1968-11-19 18:47:33.345999', 1, 2), " + - "(-1, 2, TIMESTAMP '1969-01-01 00:00:00.000000', TIMESTAMP '1969-01-01 05:06:07.234999', 3, 4), " + - "(0, 4, TIMESTAMP '1970-01-18 12:03:08.456000', TIMESTAMP '1970-12-31 12:55:00.456999', 5, 8), " + - "(45, 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-09-15 14:21:02.345999', 9, 10), " + - "(50, 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-08-21 16:12:12.654999', 11, 12)"; + expected = + "VALUES " + + "(-2, 2, TIMESTAMP '1968-03-15 15:13:12.876000', TIMESTAMP '1968-11-19 18:47:33.345999', 1, 2), " + + "(-1, 2, TIMESTAMP '1969-01-01 00:00:00.000000', TIMESTAMP '1969-01-01 05:06:07.234999', 3, 4), " + + "(0, 4, TIMESTAMP '1970-01-18 12:03:08.456000', TIMESTAMP '1970-12-31 12:55:00.456999', 5, 8), " + + "(45, 2, TIMESTAMP '2015-05-15 13:05:01.234000', TIMESTAMP '2015-09-15 14:21:02.345999', 9, 10), " + + "(50, 2, TIMESTAMP '2020-02-21 15:11:11.876000', TIMESTAMP '2020-08-21 16:12:12.654999', 11, 12)"; expectedTimestampStats = "'1968-03-15 15:13:12.876000', '2020-08-21 16:12:12.654999'"; } - assertQuery("SELECT partition.d_year, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_year_transform_timestamp$partitions\"", expected); + assertQuery( + "SELECT partition.d_year, record_count, data.d.min, data.d.max, data.b.min, data.b.max FROM \"test_year_transform_timestamp$partitions\"", + expected); - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_year_transform_timestamp WHERE day_of_week(d) = 2 AND b % 7 = 3", "VALUES (TIMESTAMP '2015-09-15 14:21:02.345678', 10)"); assertThat(query("SHOW STATS FOR test_year_transform_timestamp")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, " + expectedTimestampStats + "), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + - " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, " + + expectedTimestampStats + + "), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '12'), " + + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); dropTable("test_year_transform_timestamp"); } @Test public void testTruncateTextTransform() { - assertUpdate("CREATE TABLE test_truncate_text_transform (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['truncate(d, 2)'])"); - String select = "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"test_truncate_text_transform$partitions\""; + assertUpdate( + "CREATE TABLE test_truncate_text_transform (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['truncate(d, 2)'])"); + String select = + "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"test_truncate_text_transform$partitions\""; - assertUpdate("INSERT INTO test_truncate_text_transform VALUES" + - "(NULL, 101)," + - "('abcd', 1)," + - "('abxy', 2)," + - "('ab598', 3)," + - "('mommy', 4)," + - "('moscow', 5)," + - "('Greece', 6)," + - "('Grozny', 7)", 8); + assertUpdate( + "INSERT INTO test_truncate_text_transform VALUES" + + "(NULL, 101)," + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('moscow', 5)," + + "('Greece', 6)," + + "('Grozny', 7)", + 8); - assertQuery("SELECT partition.d_trunc FROM \"test_truncate_text_transform$partitions\"", "VALUES NULL, 'ab', 'mo', 'Gr'"); + assertQuery( + "SELECT partition.d_trunc FROM \"test_truncate_text_transform$partitions\"", + "VALUES NULL, 'ab', 'mo', 'Gr'"); - assertQuery("SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'ab'", "VALUES 1, 2, 3"); - assertQuery(select + " WHERE partition.d_trunc = 'ab'", "VALUES ('ab', 3, 'ab598', 'abxy', 1, 3)"); + assertQuery( + "SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'ab'", + "VALUES 1, 2, 3"); + assertQuery( + select + " WHERE partition.d_trunc = 'ab'", "VALUES ('ab', 3, 'ab598', 'abxy', 1, 3)"); - assertQuery("SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'mo'", "VALUES 4, 5"); - assertQuery(select + " WHERE partition.d_trunc = 'mo'", "VALUES ('mo', 2, 'mommy', 'moscow', 4, 5)"); + assertQuery( + "SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'mo'", + "VALUES 4, 5"); + assertQuery( + select + " WHERE partition.d_trunc = 'mo'", "VALUES ('mo', 2, 'mommy', 'moscow', 4, 5)"); - assertQuery("SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'Gr'", "VALUES 6, 7"); - assertQuery(select + " WHERE partition.d_trunc = 'Gr'", "VALUES ('Gr', 2, 'Greece', 'Grozny', 6, 7)"); + assertQuery( + "SELECT b FROM test_truncate_text_transform WHERE substring(d, 1, 2) = 'Gr'", + "VALUES 6, 7"); + assertQuery( + select + " WHERE partition.d_trunc = 'Gr'", "VALUES ('Gr', 2, 'Greece', 'Grozny', 6, 7)"); - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_truncate_text_transform WHERE length(d) = 4 AND b % 7 = 2", "VALUES ('abxy', 2)"); assertThat(query("SHOW STATS FOR test_truncate_text_transform")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', " + (format == PARQUET ? "553e0" : "NULL") + ", NULL, " + (format == AVRO ? "NULL" : "0.125e0") + ", NULL, NULL, NULL), " + - (format == AVRO ? " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " : " ('b', NULL, NULL, 0e0, NULL, '1', '101'), ") + - " (NULL, NULL, NULL, NULL, 8e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', " + + (format == PARQUET ? "553e0" : "NULL") + + ", NULL, " + + (format == AVRO ? "NULL" : "0.125e0") + + ", NULL, NULL, NULL), " + + (format == AVRO + ? " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " + : " ('b', NULL, NULL, 0e0, NULL, '1', '101'), ") + + " (NULL, NULL, NULL, NULL, 8e0, NULL, NULL)"); dropTable("test_truncate_text_transform"); } @@ -1477,27 +1790,41 @@ public void testTruncateTextTransform() { @Test(dataProvider = "truncateNumberTypesProvider") public void testTruncateIntegerTransform(String dataType) { String table = format("test_truncate_%s_transform", dataType); - assertUpdate(format("CREATE TABLE " + table + " (d %s, b BIGINT) WITH (partitioning = ARRAY['truncate(d, 10)'])", dataType)); - String select = "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"" + table + "$partitions\""; - - assertUpdate("INSERT INTO " + table + " VALUES" + - "(0, 1)," + - "(1, 2)," + - "(5, 3)," + - "(9, 4)," + - "(10, 5)," + - "(11, 6)," + - "(120, 7)," + - "(121, 8)," + - "(123, 9)," + - "(-1, 10)," + - "(-5, 11)," + - "(-10, 12)," + - "(-11, 13)," + - "(-123, 14)," + - "(-130, 15)", 15); - - assertQuery("SELECT partition.d_trunc FROM \"" + table + "$partitions\"", "VALUES 0, 10, 120, -10, -20, -130"); + assertUpdate( + format( + "CREATE TABLE " + + table + + " (d %s, b BIGINT) WITH (partitioning = ARRAY['truncate(d, 10)'])", + dataType)); + String select = + "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"" + + table + + "$partitions\""; + + assertUpdate( + "INSERT INTO " + + table + + " VALUES" + + "(0, 1)," + + "(1, 2)," + + "(5, 3)," + + "(9, 4)," + + "(10, 5)," + + "(11, 6)," + + "(120, 7)," + + "(121, 8)," + + "(123, 9)," + + "(-1, 10)," + + "(-5, 11)," + + "(-10, 12)," + + "(-11, 13)," + + "(-123, 14)," + + "(-130, 15)", + 15); + + assertQuery( + "SELECT partition.d_trunc FROM \"" + table + "$partitions\"", + "VALUES 0, 10, 120, -10, -20, -130"); assertQuery("SELECT b FROM " + table + " WHERE d IN (0, 1, 5, 9)", "VALUES 1, 2, 3, 4"); assertQuery(select + " WHERE partition.d_trunc = 0", "VALUES (0, 4, 0, 9, 1, 4)"); @@ -1517,82 +1844,94 @@ public void testTruncateIntegerTransform(String dataType) { assertQuery("SELECT b FROM " + table + " WHERE d IN (-123, -130)", "VALUES 14, 15"); assertQuery(select + " WHERE partition.d_trunc = -130", "VALUES (-130, 2, -130, -123, 14, 15)"); - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates - assertQuery( - "SELECT * FROM " + table + " WHERE d % 10 = -1 AND b % 7 = 3", - "VALUES (-1, 10)"); + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates + assertQuery("SELECT * FROM " + table + " WHERE d % 10 = -1 AND b % 7 = 3", "VALUES (-1, 10)"); assertThat(query("SHOW STATS FOR " + table)) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, '-130', '123'), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '15'), " + - " (NULL, NULL, NULL, NULL, 15e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, '-130', '123'), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '15'), " + + " (NULL, NULL, NULL, NULL, 15e0, NULL, NULL)"); dropTable(table); } @DataProvider public Object[][] truncateNumberTypesProvider() { - return new Object[][]{ - {"integer"}, - {"bigint"}, + return new Object[][] { + {"integer"}, {"bigint"}, }; } @Test public void testTruncateDecimalTransform() { - assertUpdate("CREATE TABLE test_truncate_decimal_transform (d DECIMAL(9, 2), b BIGINT) WITH (partitioning = ARRAY['truncate(d, 10)'])"); - String select = "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"test_truncate_decimal_transform$partitions\""; + assertUpdate( + "CREATE TABLE test_truncate_decimal_transform (d DECIMAL(9, 2), b BIGINT) WITH (partitioning = ARRAY['truncate(d, 10)'])"); + String select = + "SELECT partition.d_trunc, record_count, data.d.min AS d_min, data.d.max AS d_max, data.b.min AS b_min, data.b.max AS b_max FROM \"test_truncate_decimal_transform$partitions\""; - assertUpdate("INSERT INTO test_truncate_decimal_transform VALUES" + - "(12.34, 1)," + - "(12.30, 2)," + - "(12.29, 3)," + - "(0.05, 4)," + - "(-0.05, 5)", 5); + assertUpdate( + "INSERT INTO test_truncate_decimal_transform VALUES" + + "(12.34, 1)," + + "(12.30, 2)," + + "(12.29, 3)," + + "(0.05, 4)," + + "(-0.05, 5)", + 5); - assertQuery("SELECT partition.d_trunc FROM \"test_truncate_decimal_transform$partitions\"", "VALUES 12.30, 12.20, 0.00, -0.10"); + assertQuery( + "SELECT partition.d_trunc FROM \"test_truncate_decimal_transform$partitions\"", + "VALUES 12.30, 12.20, 0.00, -0.10"); - assertQuery("SELECT b FROM test_truncate_decimal_transform WHERE d IN (12.34, 12.30)", "VALUES 1, 2"); - assertQuery(select + " WHERE partition.d_trunc = 12.30", "VALUES (12.30, 2, 12.30, 12.34, 1, 2)"); + assertQuery( + "SELECT b FROM test_truncate_decimal_transform WHERE d IN (12.34, 12.30)", "VALUES 1, 2"); + assertQuery( + select + " WHERE partition.d_trunc = 12.30", "VALUES (12.30, 2, 12.30, 12.34, 1, 2)"); assertQuery("SELECT b FROM test_truncate_decimal_transform WHERE d = 12.29", "VALUES 3"); - assertQuery(select + " WHERE partition.d_trunc = 12.20", "VALUES (12.20, 1, 12.29, 12.29, 3, 3)"); + assertQuery( + select + " WHERE partition.d_trunc = 12.20", "VALUES (12.20, 1, 12.29, 12.29, 3, 3)"); assertQuery("SELECT b FROM test_truncate_decimal_transform WHERE d = 0.05", "VALUES 4"); assertQuery(select + " WHERE partition.d_trunc = 0.00", "VALUES (0.00, 1, 0.05, 0.05, 4, 4)"); assertQuery("SELECT b FROM test_truncate_decimal_transform WHERE d = -0.05", "VALUES 5"); - assertQuery(select + " WHERE partition.d_trunc = -0.10", "VALUES (-0.10, 1, -0.05, -0.05, 5, 5)"); + assertQuery( + select + " WHERE partition.d_trunc = -0.10", "VALUES (-0.10, 1, -0.05, -0.05, 5, 5)"); - // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via non-pushdownable predicates + // Exercise IcebergMetadata.applyFilter with non-empty Constraint.predicate, via + // non-pushdownable predicates assertQuery( "SELECT * FROM test_truncate_decimal_transform WHERE d * 100 % 10 = 9 AND b % 7 = 3", "VALUES (12.29, 3)"); assertThat(query("SHOW STATS FOR test_truncate_decimal_transform")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, 0e0, NULL, '-0.05', '12.34'), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '5'), " + - " (NULL, NULL, NULL, NULL, 5e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, '-0.05', '12.34'), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '5'), " + + " (NULL, NULL, NULL, NULL, 5e0, NULL, NULL)"); dropTable("test_truncate_decimal_transform"); } @Test public void testApplyFilterWithNonEmptyConstraintPredicate() { - assertUpdate("CREATE TABLE test_apply_functional_constraint (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['bucket(d, 2)'])"); - assertUpdate( - "INSERT INTO test_apply_functional_constraint VALUES" + - "('abcd', 1)," + - "('abxy', 2)," + - "('ab598', 3)," + - "('mommy', 4)," + - "('moscow', 5)," + - "('Greece', 6)," + - "('Grozny', 7)", + assertUpdate( + "CREATE TABLE test_apply_functional_constraint (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['bucket(d, 2)'])"); + assertUpdate( + "INSERT INTO test_apply_functional_constraint VALUES" + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('moscow', 5)," + + "('Greece', 6)," + + "('Grozny', 7)", 7); assertQuery( @@ -1601,21 +1940,24 @@ public void testApplyFilterWithNonEmptyConstraintPredicate() { String expected = null; if (format == ORC) { - expected = "VALUES " + - " ('d', NULL, NULL, 0e0, NULL, NULL, NULL), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + - " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; + expected = + "VALUES " + + " ('d', NULL, NULL, 0e0, NULL, NULL, NULL), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; } if (format == PARQUET) { - expected = "VALUES " + - " ('d', 367e0, NULL, 0e0, NULL, NULL, NULL), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + - " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; + expected = + "VALUES " + + " ('d', 367e0, NULL, 0e0, NULL, NULL, NULL), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; } else if (format == AVRO) { - expected = "VALUES " + - " ('d', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; + expected = + "VALUES " + + " ('d', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"; } assertThat(query("SHOW STATS FOR test_apply_functional_constraint")) .skippingTypesCheck() @@ -1626,15 +1968,17 @@ public void testApplyFilterWithNonEmptyConstraintPredicate() { @Test public void testVoidTransform() { - assertUpdate("CREATE TABLE test_void_transform (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['void(d)'])"); - String values = "VALUES " + - "('abcd', 1)," + - "('abxy', 2)," + - "('ab598', 3)," + - "('mommy', 4)," + - "('Warsaw', 5)," + - "(NULL, 6)," + - "(NULL, 7)"; + assertUpdate( + "CREATE TABLE test_void_transform (d VARCHAR, b BIGINT) WITH (partitioning = ARRAY['void(d)'])"); + String values = + "VALUES " + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('Warsaw', 5)," + + "(NULL, 6)," + + "(NULL, 7)"; assertUpdate("INSERT INTO test_void_transform " + values, 7); assertQuery("SELECT * FROM test_void_transform", values); @@ -1651,29 +1995,33 @@ public void testVoidTransform() { assertQuery( "SELECT d, b FROM test_void_transform WHERE d IS NOT NULL", - "VALUES " + - "('abcd', 1)," + - "('abxy', 2)," + - "('ab598', 3)," + - "('mommy', 4)," + - "('Warsaw', 5)"); + "VALUES " + + "('abcd', 1)," + + "('abxy', 2)," + + "('ab598', 3)," + + "('mommy', 4)," + + "('Warsaw', 5)"); assertQuery("SELECT b FROM test_void_transform WHERE d IS NULL", "VALUES 6, 7"); if (format != AVRO) { assertThat(query("SHOW STATS FOR test_void_transform")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', " + (format == PARQUET ? "205e0" : "NULL") + ", NULL, 0.2857142857142857, NULL, NULL, NULL), " + - " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + - " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', " + + (format == PARQUET ? "205e0" : "NULL") + + ", NULL, 0.2857142857142857, NULL, NULL, NULL), " + + " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); } else { assertThat(query("SHOW STATS FOR test_void_transform")) .skippingTypesCheck() - .matches("VALUES " + - " ('d', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('d', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('b', NULL, NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); } // Void transform doesn't allow filter elimination @@ -1689,8 +2037,11 @@ public void testVoidTransform() { @Test public void testMetadataDeleteSimple() { - assertUpdate("CREATE TABLE test_metadata_delete_simple (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])"); - assertUpdate("INSERT INTO test_metadata_delete_simple VALUES(1, 100), (1, 101), (1, 102), (2, 200), (2, 201), (3, 300)", 6); + assertUpdate( + "CREATE TABLE test_metadata_delete_simple (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])"); + assertUpdate( + "INSERT INTO test_metadata_delete_simple VALUES(1, 100), (1, 101), (1, 102), (2, 200), (2, 201), (3, 300)", + 6); assertQuery("SELECT sum(col2) FROM test_metadata_delete_simple", "SELECT 1004"); assertQuery("SELECT count(*) FROM \"test_metadata_delete_simple$partitions\"", "SELECT 3"); assertUpdate("DELETE FROM test_metadata_delete_simple WHERE col1 = 1", 3); @@ -1701,31 +2052,37 @@ public void testMetadataDeleteSimple() { @Test public void testMetadataDelete() { - assertUpdate("CREATE TABLE test_metadata_delete (" + - " orderkey BIGINT," + - " linenumber INTEGER," + - " linestatus VARCHAR" + - ") " + - "WITH (" + - " partitioning = ARRAY[ 'linenumber', 'linestatus' ]" + - ")"); - - assertUpdate( - "" + - "INSERT INTO test_metadata_delete " + - "SELECT orderkey, linenumber, linestatus " + - "FROM tpch.tiny.lineitem", + assertUpdate( + "CREATE TABLE test_metadata_delete (" + + " orderkey BIGINT," + + " linenumber INTEGER," + + " linestatus VARCHAR" + + ") " + + "WITH (" + + " partitioning = ARRAY[ 'linenumber', 'linestatus' ]" + + ")"); + + assertUpdate( + "" + + "INSERT INTO test_metadata_delete " + + "SELECT orderkey, linenumber, linestatus " + + "FROM tpch.tiny.lineitem", "SELECT count(*) FROM lineitem"); assertQuery("SELECT COUNT(*) FROM \"test_metadata_delete$partitions\"", "SELECT 14"); - assertUpdate("DELETE FROM test_metadata_delete WHERE linestatus = 'F' AND linenumber = 3", 5378); - assertQuery("SELECT * FROM test_metadata_delete", "SELECT orderkey, linenumber, linestatus FROM lineitem WHERE linestatus <> 'F' or linenumber <> 3"); + assertUpdate( + "DELETE FROM test_metadata_delete WHERE linestatus = 'F' AND linenumber = 3", 5378); + assertQuery( + "SELECT * FROM test_metadata_delete", + "SELECT orderkey, linenumber, linestatus FROM lineitem WHERE linestatus <> 'F' or linenumber <> 3"); assertQuery("SELECT count(*) FROM \"test_metadata_delete$partitions\"", "SELECT 13"); assertUpdate("DELETE FROM test_metadata_delete WHERE linestatus='O'", 30049); assertQuery("SELECT count(*) FROM \"test_metadata_delete$partitions\"", "SELECT 6"); - assertQuery("SELECT * FROM test_metadata_delete", "SELECT orderkey, linenumber, linestatus FROM lineitem WHERE linestatus <> 'O' AND linenumber <> 3"); + assertQuery( + "SELECT * FROM test_metadata_delete", + "SELECT orderkey, linenumber, linestatus FROM lineitem WHERE linestatus <> 'O' AND linenumber <> 3"); dropTable("test_metadata_delete"); } @@ -1737,12 +2094,9 @@ public void testInSet() { } private void testInSet(int inCount) { - String values = range(1, inCount + 1) - .mapToObj(n -> format("(%s, %s)", n, n + 10)) - .collect(joining(", ")); - String inList = range(1, inCount + 1) - .mapToObj(Integer::toString) - .collect(joining(", ")); + String values = + range(1, inCount + 1).mapToObj(n -> format("(%s, %s)", n, n + 10)).collect(joining(", ")); + String inList = range(1, inCount + 1).mapToObj(Integer::toString).collect(joining(", ")); assertUpdate("CREATE TABLE test_in_set (col1 INTEGER, col2 BIGINT)"); assertUpdate(format("INSERT INTO test_in_set VALUES %s", values), inCount); @@ -1759,7 +2113,8 @@ public void testBasicTableStatistics() { assertUpdate(insertStart + " VALUES -10", 1); assertUpdate(insertStart + " VALUES 100", 1); - // SHOW STATS returns rows of the form: column_name, data_size, distinct_values_count, nulls_fractions, row_count, low_value, high_value + // SHOW STATS returns rows of the form: column_name, data_size, distinct_values_count, + // nulls_fractions, row_count, low_value, high_value MaterializedResult result = computeActual("SHOW STATS FOR " + tableName); MaterializedResult expectedStatistics = @@ -1811,13 +2166,23 @@ public void testMultipleColumnTableStatistics() { .build(); assertEquals(result, expectedStatistics); - assertUpdate("INSERT INTO " + tableName + " VALUES " + IntStream.rangeClosed(21, 25) - .mapToObj(i -> format("(200, %d, DATE '2020-07-%d')", i, i)) - .collect(joining(", ")), 5); + assertUpdate( + "INSERT INTO " + + tableName + + " VALUES " + + IntStream.rangeClosed(21, 25) + .mapToObj(i -> format("(200, %d, DATE '2020-07-%d')", i, i)) + .collect(joining(", ")), + 5); - assertUpdate("INSERT INTO " + tableName + " VALUES " + IntStream.rangeClosed(26, 30) - .mapToObj(i -> format("(NULL, %d, DATE '2020-06-%d')", i, i)) - .collect(joining(", ")), 5); + assertUpdate( + "INSERT INTO " + + tableName + + " VALUES " + + IntStream.rangeClosed(26, 30) + .mapToObj(i -> format("(NULL, %d, DATE '2020-06-%d')", i, i)) + .collect(joining(", ")), + 5); result = computeActual("SHOW STATS FOR " + tableName); @@ -1835,12 +2200,14 @@ public void testMultipleColumnTableStatistics() { @Test public void testPartitionedTableStatistics() { - assertUpdate("CREATE TABLE arctic.tpch.test_partitioned_table_statistics (col1 REAL, col2 BIGINT) WITH (partitioning = ARRAY['col2'])"); + assertUpdate( + "CREATE TABLE arctic.tpch.test_partitioned_table_statistics (col1 REAL, col2 BIGINT) WITH (partitioning = ARRAY['col2'])"); assertUpdate("INSERT INTO test_partitioned_table_statistics VALUES (-10, -1)", 1); assertUpdate("INSERT INTO test_partitioned_table_statistics VALUES (100, 10)", 1); - MaterializedResult result = computeActual("SHOW STATS FOR arctic.tpch.test_partitioned_table_statistics"); + MaterializedResult result = + computeActual("SHOW STATS FOR arctic.tpch.test_partitioned_table_statistics"); Assert.assertEquals(result.getRowCount(), 3); MaterializedRow row0 = result.getMaterializedRows().get(0); @@ -1858,13 +2225,17 @@ public void testPartitionedTableStatistics() { MaterializedRow row2 = result.getMaterializedRows().get(2); Assert.assertEquals(row2.getField(4), 2.0); - assertUpdate("INSERT INTO test_partitioned_table_statistics VALUES " + IntStream.rangeClosed(1, 5) - .mapToObj(i -> format("(%d, 10)", i + 100)) - .collect(joining(", ")), 5); + assertUpdate( + "INSERT INTO test_partitioned_table_statistics VALUES " + + IntStream.rangeClosed(1, 5) + .mapToObj(i -> format("(%d, 10)", i + 100)) + .collect(joining(", ")), + 5); - assertUpdate("INSERT INTO test_partitioned_table_statistics VALUES " + IntStream.rangeClosed(6, 10) - .mapToObj(i -> "(NULL, 10)") - .collect(joining(", ")), 5); + assertUpdate( + "INSERT INTO test_partitioned_table_statistics VALUES " + + IntStream.rangeClosed(6, 10).mapToObj(i -> "(NULL, 10)").collect(joining(", ")), + 5); result = computeActual("SHOW STATS FOR arctic.tpch.test_partitioned_table_statistics"); Assert.assertEquals(result.getRowCount(), 3); @@ -1883,9 +2254,10 @@ public void testPartitionedTableStatistics() { row2 = result.getMaterializedRows().get(2); Assert.assertEquals(row2.getField(4), 12.0); - assertUpdate("INSERT INTO test_partitioned_table_statistics VALUES " + IntStream.rangeClosed(6, 10) - .mapToObj(i -> "(100, NULL)") - .collect(joining(", ")), 5); + assertUpdate( + "INSERT INTO test_partitioned_table_statistics VALUES " + + IntStream.rangeClosed(6, 10).mapToObj(i -> "(100, NULL)").collect(joining(", ")), + 5); result = computeActual("SHOW STATS FOR arctic.tpch.test_partitioned_table_statistics"); row0 = result.getMaterializedRows().get(0); @@ -1909,7 +2281,10 @@ public void testPartitionedTableStatistics() { @Test public void testPredicatePushdown() { QualifiedObjectName tableName = new QualifiedObjectName("arctic", "tpch", "test_predicate"); - assertUpdate(format("CREATE TABLE %s (col1 BIGINT, col2 BIGINT, col3 BIGINT) WITH (partitioning = ARRAY['col2', 'col3'])", tableName)); + assertUpdate( + format( + "CREATE TABLE %s (col1 BIGINT, col2 BIGINT, col3 BIGINT) WITH (partitioning = ARRAY['col2', 'col3'])", + tableName)); assertUpdate(format("INSERT INTO %s VALUES (1, 10, 100)", tableName), 1L); assertUpdate(format("INSERT INTO %s VALUES (2, 20, 200)", tableName), 1L); @@ -1927,7 +2302,8 @@ public void testPredicatePushdown() { ImmutableMap.of("col2", singleValue(BIGINT, 10L)), ImmutableMap.of()); - assertQuery(format("SELECT * FROM %s WHERE col1 = 1 AND col2 = 10", tableName), "VALUES (1, 10, 100)"); + assertQuery( + format("SELECT * FROM %s WHERE col1 = 1 AND col2 = 10", tableName), "VALUES (1, 10, 100)"); assertFilterPushdown( tableName, ImmutableMap.of("col1", singleValue(BIGINT, 1L), "col2", singleValue(BIGINT, 10L)), @@ -1935,19 +2311,25 @@ public void testPredicatePushdown() { ImmutableMap.of("col1", singleValue(BIGINT, 1L))); // Assert pushdown for an IN predicate with value count above the default compaction threshold - List values = LongStream.range(1L, 1010L).boxed() - .filter(index -> index != 20L) - .collect(toImmutableList()); + List values = + LongStream.range(1L, 1010L) + .boxed() + .filter(index -> index != 20L) + .collect(toImmutableList()); assertThat(values).hasSizeGreaterThan(ICEBERG_DOMAIN_COMPACTION_THRESHOLD); - String valuesString = join(",", values.stream().map(Object::toString).collect(toImmutableList())); + String valuesString = + join(",", values.stream().map(Object::toString).collect(toImmutableList())); String inPredicate = "%s IN (" + valuesString + ")"; assertQuery( - format("SELECT * FROM %s WHERE %s AND %s", tableName, format(inPredicate, "col1"), format(inPredicate, "col2")), + format( + "SELECT * FROM %s WHERE %s AND %s", + tableName, format(inPredicate, "col1"), format(inPredicate, "col2")), "VALUES (1, 10, 100)"); assertFilterPushdown( tableName, - ImmutableMap.of("col1", multipleValues(BIGINT, values), "col2", multipleValues(BIGINT, values)), + ImmutableMap.of( + "col1", multipleValues(BIGINT, values), "col2", multipleValues(BIGINT, values)), ImmutableMap.of("col2", multipleValues(BIGINT, values)), // Unenforced predicate is simplified during split generation, but not reflected here ImmutableMap.of("col1", multipleValues(BIGINT, values))); @@ -1958,18 +2340,30 @@ public void testPredicatePushdown() { @Test public void testPredicatesWithStructuralTypes() { String tableName = "test_predicate_with_structural_types"; - assertUpdate("CREATE TABLE " + tableName + " (id INT, array_t ARRAY(BIGINT), map_t MAP(BIGINT, BIGINT), struct_t ROW(f1 BIGINT, f2 BIGINT))"); - assertUpdate("INSERT INTO " + tableName + " VALUES " + - "(1, ARRAY[1, 2, 3], MAP(ARRAY[1,3], ARRAY[2,4]), ROW(1, 2)), " + - "(11, ARRAY[11, 12, 13], MAP(ARRAY[11, 13], ARRAY[12, 14]), ROW(11, 12)), " + - "(11, ARRAY[111, 112, 113], MAP(ARRAY[111, 13], ARRAY[112, 114]), ROW(111, 112)), " + - "(21, ARRAY[21, 22, 23], MAP(ARRAY[21, 23], ARRAY[22, 24]), ROW(21, 22))", + assertUpdate( + "CREATE TABLE " + + tableName + + " (id INT, array_t ARRAY(BIGINT), map_t MAP(BIGINT, BIGINT), struct_t ROW(f1 BIGINT, f2 BIGINT))"); + assertUpdate( + "INSERT INTO " + + tableName + + " VALUES " + + "(1, ARRAY[1, 2, 3], MAP(ARRAY[1,3], ARRAY[2,4]), ROW(1, 2)), " + + "(11, ARRAY[11, 12, 13], MAP(ARRAY[11, 13], ARRAY[12, 14]), ROW(11, 12)), " + + "(11, ARRAY[111, 112, 113], MAP(ARRAY[111, 13], ARRAY[112, 114]), ROW(111, 112)), " + + "(21, ARRAY[21, 22, 23], MAP(ARRAY[21, 23], ARRAY[22, 24]), ROW(21, 22))", 4); assertQuery("SELECT id FROM " + tableName + " WHERE array_t = ARRAY[1, 2, 3]", "VALUES 1"); - assertQuery("SELECT id FROM " + tableName + " WHERE map_t = MAP(ARRAY[11, 13], ARRAY[12, 14])", "VALUES 11"); + assertQuery( + "SELECT id FROM " + tableName + " WHERE map_t = MAP(ARRAY[11, 13], ARRAY[12, 14])", + "VALUES 11"); assertQuery("SELECT id FROM " + tableName + " WHERE struct_t = ROW(21, 22)", "VALUES 21"); - assertQuery("SELECT struct_t.f1 FROM " + tableName + " WHERE id = 11 AND map_t = MAP(ARRAY[11, 13], ARRAY[12, 14])", "VALUES 11"); + assertQuery( + "SELECT struct_t.f1 FROM " + + tableName + + " WHERE id = 11 AND map_t = MAP(ARRAY[11, 13], ARRAY[12, 14])", + "VALUES 11"); dropTable(tableName); } @@ -1977,14 +2371,15 @@ public void testPredicatesWithStructuralTypes() { @Test(dataProviderClass = DataProviders.class, dataProvider = "trueFalse") public void testPartitionsTableWithColumnNameConflict(boolean partitioned) { assertUpdate("DROP TABLE IF EXISTS test_partitions_with_conflict"); - assertUpdate("CREATE TABLE test_partitions_with_conflict (" + - " p integer, " + - " row_count integer, " + - " record_count integer, " + - " file_count integer, " + - " total_size integer " + - ") " + - (partitioned ? "WITH(partitioning = ARRAY['p'])" : "")); + assertUpdate( + "CREATE TABLE test_partitions_with_conflict (" + + " p integer, " + + " row_count integer, " + + " record_count integer, " + + " file_count integer, " + + " total_size integer " + + ") " + + (partitioned ? "WITH(partitioning = ARRAY['p'])" : "")); assertUpdate("INSERT INTO test_partitions_with_conflict VALUES (11, 12, 13, 14, 15)", 1); @@ -1994,28 +2389,32 @@ public void testPartitionsTableWithColumnNameConflict(boolean partitioned) { // test $partitions assertThat(query("SELECT * FROM \"test_partitions_with_conflict$partitions\"")) - .matches("SELECT " + - (partitioned ? "CAST(ROW(11) AS row(p integer)), " : "") + - "BIGINT '1', " + - "BIGINT '1', " + - // total_size is not exactly deterministic, so grab whatever value there is - "(SELECT total_size FROM \"test_partitions_with_conflict$partitions\"), " + - "CAST(" + - " ROW (" + - (partitioned ? "" : " ROW(11, 11, 0, NULL), ") + - " ROW(12, 12, 0, NULL), " + - " ROW(13, 13, 0, NULL), " + - " ROW(14, 14, 0, NULL), " + - " ROW(15, 15, 0, NULL) " + - " ) " + - " AS row(" + - (partitioned ? "" : " p row(min integer, max integer, null_count bigint, nan_count bigint), ") + - " row_count row(min integer, max integer, null_count bigint, nan_count bigint), " + - " record_count row(min integer, max integer, null_count bigint, nan_count bigint), " + - " file_count row(min integer, max integer, null_count bigint, nan_count bigint), " + - " total_size row(min integer, max integer, null_count bigint, nan_count bigint) " + - " )" + - ")"); + .matches( + "SELECT " + + (partitioned ? "CAST(ROW(11) AS row(p integer)), " : "") + + "BIGINT '1', " + + "BIGINT '1', " + + + // total_size is not exactly deterministic, so grab whatever value there is + "(SELECT total_size FROM \"test_partitions_with_conflict$partitions\"), " + + "CAST(" + + " ROW (" + + (partitioned ? "" : " ROW(11, 11, 0, NULL), ") + + " ROW(12, 12, 0, NULL), " + + " ROW(13, 13, 0, NULL), " + + " ROW(14, 14, 0, NULL), " + + " ROW(15, 15, 0, NULL) " + + " ) " + + " AS row(" + + (partitioned + ? "" + : " p row(min integer, max integer, null_count bigint, nan_count bigint), ") + + " row_count row(min integer, max integer, null_count bigint, nan_count bigint), " + + " record_count row(min integer, max integer, null_count bigint, nan_count bigint), " + + " file_count row(min integer, max integer, null_count bigint, nan_count bigint), " + + " total_size row(min integer, max integer, null_count bigint, nan_count bigint) " + + " )" + + ")"); assertUpdate("DROP TABLE test_partitions_with_conflict"); } @@ -2027,160 +2426,209 @@ private void assertFilterPushdown( Map expectedUnenforcedPredicate) { Metadata metadata = getQueryRunner().getMetadata(); - newTransaction().execute(getSession(), session -> { - TableHandle table = metadata.getTableHandle(session, tableName) - .orElseThrow(() -> new TableNotFoundException(tableName.asSchemaTableName())); - - Map columns = metadata.getColumnHandles(session, table); - TupleDomain domains = TupleDomain.withColumnDomains( - filter.entrySet().stream() - .collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue))); - - Optional> result = metadata.applyFilter(session, table, new Constraint(domains)); - - assertTrue(result.isEmpty() == (expectedUnenforcedPredicate == null && expectedEnforcedPredicate == null)); - - if (result.isPresent()) { - IcebergTableHandle newTable = (IcebergTableHandle) result.get().getHandle().getConnectorHandle(); - - Assert.assertEquals( - newTable.getEnforcedPredicate(), - TupleDomain.withColumnDomains(expectedEnforcedPredicate.entrySet().stream() - .collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue)))); - - Assert.assertEquals( - newTable.getUnenforcedPredicate(), - TupleDomain.withColumnDomains(expectedUnenforcedPredicate.entrySet().stream() - .collect(toImmutableMap(entry -> columns.get(entry.getKey()), Map.Entry::getValue)))); - } - }); + newTransaction() + .execute( + getSession(), + session -> { + TableHandle table = + metadata + .getTableHandle(session, tableName) + .orElseThrow(() -> new TableNotFoundException(tableName.asSchemaTableName())); + + Map columns = metadata.getColumnHandles(session, table); + TupleDomain domains = + TupleDomain.withColumnDomains( + filter.entrySet().stream() + .collect( + toImmutableMap( + entry -> columns.get(entry.getKey()), Map.Entry::getValue))); + + Optional> result = + metadata.applyFilter(session, table, new Constraint(domains)); + + assertTrue( + result.isEmpty() + == (expectedUnenforcedPredicate == null + && expectedEnforcedPredicate == null)); + + if (result.isPresent()) { + IcebergTableHandle newTable = + (IcebergTableHandle) result.get().getHandle().getConnectorHandle(); + + Assert.assertEquals( + newTable.getEnforcedPredicate(), + TupleDomain.withColumnDomains( + expectedEnforcedPredicate.entrySet().stream() + .collect( + toImmutableMap( + entry -> columns.get(entry.getKey()), Map.Entry::getValue)))); + + Assert.assertEquals( + newTable.getUnenforcedPredicate(), + TupleDomain.withColumnDomains( + expectedUnenforcedPredicate.entrySet().stream() + .collect( + toImmutableMap( + entry -> columns.get(entry.getKey()), Map.Entry::getValue)))); + } + }); } @Test public void testCreateNestedPartitionedTable() { - assertUpdate("CREATE TABLE test_nested_table_1 (" + - " bool BOOLEAN" + - ", int INTEGER" + - ", arr ARRAY(VARCHAR)" + - ", big BIGINT" + - ", rl REAL" + - ", dbl DOUBLE" + - ", mp MAP(INTEGER, VARCHAR)" + - ", dec DECIMAL(5,2)" + - ", vc VARCHAR" + - ", vb VARBINARY" + - ", ts TIMESTAMP(6)" + - ", tstz TIMESTAMP(6) WITH TIME ZONE" + - ", str ROW(id INTEGER , vc VARCHAR)" + - ", dt DATE)" + - " WITH (partitioning = ARRAY['int'])"); - - assertUpdate( - "INSERT INTO test_nested_table_1 " + - " select true, 1, array['uno', 'dos', 'tres'], BIGINT '1', REAL '1.0', DOUBLE '1.0', map(array[1,2,3,4], array['ek','don','teen','char'])," + - " CAST(1.0 as DECIMAL(5,2))," + - " 'one', VARBINARY 'binary0/1values',\n" + - " TIMESTAMP '2021-07-24 02:43:57.348000'," + - " TIMESTAMP '2021-07-24 02:43:57.348000 UTC'," + - " (CAST(ROW(null, 'this is a random value') AS ROW(int, varchar))), " + - " DATE '2021-07-24'", + assertUpdate( + "CREATE TABLE test_nested_table_1 (" + + " bool BOOLEAN" + + ", int INTEGER" + + ", arr ARRAY(VARCHAR)" + + ", big BIGINT" + + ", rl REAL" + + ", dbl DOUBLE" + + ", mp MAP(INTEGER, VARCHAR)" + + ", dec DECIMAL(5,2)" + + ", vc VARCHAR" + + ", vb VARBINARY" + + ", ts TIMESTAMP(6)" + + ", tstz TIMESTAMP(6) WITH TIME ZONE" + + ", str ROW(id INTEGER , vc VARCHAR)" + + ", dt DATE)" + + " WITH (partitioning = ARRAY['int'])"); + + assertUpdate( + "INSERT INTO test_nested_table_1 " + + " select true, 1, array['uno', 'dos', 'tres'], BIGINT '1', REAL '1.0', DOUBLE '1.0', map(array[1,2,3,4], array['ek','don','teen','char'])," + + " CAST(1.0 as DECIMAL(5,2))," + + " 'one', VARBINARY 'binary0/1values',\n" + + " TIMESTAMP '2021-07-24 02:43:57.348000'," + + " TIMESTAMP '2021-07-24 02:43:57.348000 UTC'," + + " (CAST(ROW(null, 'this is a random value') AS ROW(int, varchar))), " + + " DATE '2021-07-24'", 1); assertEquals(computeActual("SELECT * from test_nested_table_1").getRowCount(), 1); if (format != AVRO) { assertThat(query("SHOW STATS FOR test_nested_table_1")) .skippingTypesCheck() - .matches("VALUES " + - " ('bool', NULL, NULL, 0e0, NULL, 'true', 'true'), " + - " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('arr', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " ('big', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('rl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('dbl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('mp', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " ('dec', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('vc', " + (format == PARQUET ? "116e0" : "NULL") + ", NULL, 0e0, NULL, NULL, NULL), " + - " ('vb', " + (format == PARQUET ? "77e0" : "NULL") + ", NULL, 0e0, NULL, NULL, NULL), " + - " ('ts', NULL, NULL, 0e0, NULL, '2021-07-24 02:43:57.348000', " + (format == ORC ? "'2021-07-24 02:43:57.348999'" : "'2021-07-24 02:43:57.348000'") + "), " + - " ('tstz', NULL, NULL, 0e0, NULL, '2021-07-24 02:43:57.348 UTC', '2021-07-24 02:43:57.348 UTC'), " + - " ('str', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " ('dt', NULL, NULL, 0e0, NULL, '2021-07-24', '2021-07-24'), " + - " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('bool', NULL, NULL, 0e0, NULL, 'true', 'true'), " + + " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('arr', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " ('big', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('rl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('dbl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('mp', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " ('dec', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('vc', " + + (format == PARQUET ? "116e0" : "NULL") + + ", NULL, 0e0, NULL, NULL, NULL), " + + " ('vb', " + + (format == PARQUET ? "77e0" : "NULL") + + ", NULL, 0e0, NULL, NULL, NULL), " + + " ('ts', NULL, NULL, 0e0, NULL, '2021-07-24 02:43:57.348000', " + + (format == ORC + ? "'2021-07-24 02:43:57.348999'" + : "'2021-07-24 02:43:57.348000'") + + "), " + + " ('tstz', NULL, NULL, 0e0, NULL, '2021-07-24 02:43:57.348 UTC', '2021-07-24 02:43:57.348 UTC'), " + + " ('str', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " ('dt', NULL, NULL, 0e0, NULL, '2021-07-24', '2021-07-24'), " + + " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); } else { assertThat(query("SHOW STATS FOR test_nested_table_1")) .skippingTypesCheck() - .matches("VALUES " + - " ('bool', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('arr', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('big', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('rl', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('dbl', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('mp', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('dec', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('vc', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('vb', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('ts', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('tstz', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('str', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('dt', NULL, NULL, NULL, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('bool', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('arr', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('big', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('rl', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('dbl', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('mp', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('dec', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('vc', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('vb', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('ts', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('tstz', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('str', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('dt', NULL, NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); } dropTable("test_nested_table_1"); - assertUpdate("" + - "CREATE TABLE test_nested_table_2 (" + - " int INTEGER" + - ", arr ARRAY(ROW(id INTEGER, vc VARCHAR))" + - ", big BIGINT" + - ", rl REAL" + - ", dbl DOUBLE" + - ", mp MAP(INTEGER, ARRAY(VARCHAR))" + - ", dec DECIMAL(5,2)" + - ", str ROW(id INTEGER, vc VARCHAR, arr ARRAY(INTEGER))" + - ", vc VARCHAR)" + - " WITH (partitioning = ARRAY['int'])"); - - assertUpdate( - "INSERT INTO test_nested_table_2 " + - " select 1, array[cast(row(1, null) as row(int, varchar)), cast(row(2, 'dos') as row(int, varchar))], BIGINT '1', REAL '1.0', DOUBLE '1.0', " + - "map(array[1,2], array[array['ek', 'one'], array['don', 'do', 'two']]), CAST(1.0 as DECIMAL(5,2)), " + - "CAST(ROW(1, 'this is a random value', null) AS ROW(int, varchar, array(int))), 'one'", + assertUpdate( + "" + + "CREATE TABLE test_nested_table_2 (" + + " int INTEGER" + + ", arr ARRAY(ROW(id INTEGER, vc VARCHAR))" + + ", big BIGINT" + + ", rl REAL" + + ", dbl DOUBLE" + + ", mp MAP(INTEGER, ARRAY(VARCHAR))" + + ", dec DECIMAL(5,2)" + + ", str ROW(id INTEGER, vc VARCHAR, arr ARRAY(INTEGER))" + + ", vc VARCHAR)" + + " WITH (partitioning = ARRAY['int'])"); + + assertUpdate( + "INSERT INTO test_nested_table_2 " + + " select 1, array[cast(row(1, null) as row(int, varchar)), cast(row(2, 'dos') as row(int, varchar))], BIGINT '1', REAL '1.0', DOUBLE '1.0', " + + "map(array[1,2], array[array['ek', 'one'], array['don', 'do', 'two']]), CAST(1.0 as DECIMAL(5,2)), " + + "CAST(ROW(1, 'this is a random value', null) AS ROW(int, varchar, array(int))), 'one'", 1); assertEquals(computeActual("SELECT * from test_nested_table_2").getRowCount(), 1); if (format != AVRO) { assertThat(query("SHOW STATS FOR test_nested_table_2")) .skippingTypesCheck() - .matches("VALUES " + - " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('arr', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " ('big', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('rl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('dbl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('mp', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " ('dec', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + - " ('vc', " + (format == PARQUET ? "116e0" : "NULL") + ", NULL, 0e0, NULL, NULL, NULL), " + - " ('str', NULL, NULL, " + (format == ORC ? "0e0" : "NULL") + ", NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('arr', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " ('big', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('rl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('dbl', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('mp', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " ('dec', NULL, NULL, 0e0, NULL, '1.0', '1.0'), " + + " ('vc', " + + (format == PARQUET ? "116e0" : "NULL") + + ", NULL, 0e0, NULL, NULL, NULL), " + + " ('str', NULL, NULL, " + + (format == ORC ? "0e0" : "NULL") + + ", NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); } else { assertThat(query("SHOW STATS FOR test_nested_table_2")) .skippingTypesCheck() - .matches("VALUES " + - " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + - " ('arr', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('big', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('rl', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('dbl', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('mp', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('dec', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('vc', NULL, NULL, NULL, NULL, NULL, NULL), " + - " ('str', NULL, NULL, NULL, NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('int', NULL, NULL, 0e0, NULL, '1', '1'), " + + " ('arr', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('big', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('rl', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('dbl', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('mp', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('dec', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('vc', NULL, NULL, NULL, NULL, NULL, NULL), " + + " ('str', NULL, NULL, NULL, NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 1e0, NULL, NULL)"); } - assertUpdate("CREATE TABLE test_nested_table_3 WITH (partitioning = ARRAY['int']) AS SELECT * FROM test_nested_table_2", 1); + assertUpdate( + "CREATE TABLE test_nested_table_3 WITH (partitioning = ARRAY['int']) AS SELECT * FROM test_nested_table_2", + 1); assertEquals(computeActual("SELECT * FROM test_nested_table_3").getRowCount(), 1); @@ -2191,25 +2639,25 @@ public void testCreateNestedPartitionedTable() { dropTable("test_nested_table_3"); } -// @Test -// public void testSerializableReadIsolation() -// { -// assertUpdate("CREATE TABLE test_read_isolation (x int)"); -// assertUpdate("INSERT INTO test_read_isolation VALUES 123, 456", 2); -// -// withTransaction(session -> { -// assertQuery(session, "SELECT * FROM test_read_isolation", "VALUES 123, 456"); -// -// assertUpdate("INSERT INTO test_read_isolation VALUES 789", 1); -// assertQuery("SELECT * FROM test_read_isolation", "VALUES 123, 456, 789"); -// -// assertQuery(session, "SELECT * FROM test_read_isolation", "VALUES 123, 456"); -// }); -// -// assertQuery("SELECT * FROM test_read_isolation", "VALUES 123, 456, 789"); -// -// dropTable("test_read_isolation"); -// } + // @Test + // public void testSerializableReadIsolation() + // { + // assertUpdate("CREATE TABLE test_read_isolation (x int)"); + // assertUpdate("INSERT INTO test_read_isolation VALUES 123, 456", 2); + // + // withTransaction(session -> { + // assertQuery(session, "SELECT * FROM test_read_isolation", "VALUES 123, 456"); + // + // assertUpdate("INSERT INTO test_read_isolation VALUES 789", 1); + // assertQuery("SELECT * FROM test_read_isolation", "VALUES 123, 456, 789"); + // + // assertQuery(session, "SELECT * FROM test_read_isolation", "VALUES 123, 456"); + // }); + // + // assertQuery("SELECT * FROM test_read_isolation", "VALUES 123, 456, 789"); + // + // dropTable("test_read_isolation"); + // } private void withTransaction(Consumer consumer) { transaction(getQueryRunner().getTransactionManager(), getQueryRunner().getAccessControl()) @@ -2225,45 +2673,53 @@ private void dropTable(String table) { @Test public void testOptimizedMetadataQueries() { - Session session = Session.builder(getSession()) - .setSystemProperty("optimize_metadata_queries", "true") - .build(); + Session session = + Session.builder(getSession()) + .setSystemProperty("optimize_metadata_queries", "true") + .build(); - assertUpdate("CREATE TABLE test_metadata_optimization (a BIGINT, b BIGINT, c BIGINT) WITH (PARTITIONING = ARRAY['b', 'c'])"); + assertUpdate( + "CREATE TABLE test_metadata_optimization (a BIGINT, b BIGINT, c BIGINT) WITH (PARTITIONING = ARRAY['b', 'c'])"); assertUpdate("INSERT INTO test_metadata_optimization VALUES (5, 6, 7), (8, 9, 10)", 2); assertQuery(session, "SELECT DISTINCT b FROM test_metadata_optimization", "VALUES (6), (9)"); - assertQuery(session, "SELECT DISTINCT b, c FROM test_metadata_optimization", "VALUES (6, 7), (9, 10)"); - assertQuery(session, "SELECT DISTINCT b FROM test_metadata_optimization WHERE b < 7", "VALUES (6)"); - assertQuery(session, "SELECT DISTINCT b FROM test_metadata_optimization WHERE c > 8", "VALUES (9)"); + assertQuery( + session, "SELECT DISTINCT b, c FROM test_metadata_optimization", "VALUES (6, 7), (9, 10)"); + assertQuery( + session, "SELECT DISTINCT b FROM test_metadata_optimization WHERE b < 7", "VALUES (6)"); + assertQuery( + session, "SELECT DISTINCT b FROM test_metadata_optimization WHERE c > 8", "VALUES (9)"); // Assert behavior after metadata delete assertUpdate("DELETE FROM test_metadata_optimization WHERE b = 6", 1); assertQuery(session, "SELECT DISTINCT b FROM test_metadata_optimization", "VALUES (9)"); - // TODO: assert behavior after deleting the last row of a partition, once row-level deletes are supported. + // TODO: assert behavior after deleting the last row of a partition, once row-level deletes are + // supported. // i.e. a query like 'DELETE FROM test_metadata_optimization WHERE b = 6 AND a = 5' dropTable("test_metadata_optimization"); } @Test - public void testFileSizeInManifest() - throws Exception { - assertUpdate("CREATE TABLE test_file_size_in_manifest (" + - "a_bigint bigint, " + - "a_varchar varchar, " + - "a_long_decimal decimal(38,20), " + - "a_map map(varchar, integer))"); - - assertUpdate( - "INSERT INTO test_file_size_in_manifest VALUES " + - "(NULL, NULL, NULL, NULL), " + - "(42, 'some varchar value', DECIMAL '123456789123456789.123456789123456789', map(ARRAY['abc', 'def'], ARRAY[113, -237843832]))", + public void testFileSizeInManifest() throws Exception { + assertUpdate( + "CREATE TABLE test_file_size_in_manifest (" + + "a_bigint bigint, " + + "a_varchar varchar, " + + "a_long_decimal decimal(38,20), " + + "a_map map(varchar, integer))"); + + assertUpdate( + "INSERT INTO test_file_size_in_manifest VALUES " + + "(NULL, NULL, NULL, NULL), " + + "(42, 'some varchar value', DECIMAL '123456789123456789.123456789123456789', map(ARRAY['abc', 'def'], ARRAY[113, -237843832]))", 2); - MaterializedResult files = computeActual("SELECT file_path, record_count, file_size_in_bytes FROM \"test_file_size_in_manifest$files\""); + MaterializedResult files = + computeActual( + "SELECT file_path, record_count, file_size_in_bytes FROM \"test_file_size_in_manifest$files\""); long totalRecordCount = 0; for (MaterializedRow row : files.getMaterializedRows()) { String path = (String) row.getField(0); @@ -2277,305 +2733,357 @@ public void testFileSizeInManifest() assertThat(totalRecordCount).isEqualTo(2); } -// @Test -// public void testSplitPruningForFilterOnPartitionColumn() -// { -// String tableName = "nation_partitioned_pruning"; -// -// assertUpdate("DROP TABLE IF EXISTS " + tableName); -// -// // disable writes redistribution to have predictable number of files written per partition (one). -// Session noRedistributeWrites = Session.builder(getSession()) -// .setSystemProperty("redistribute_writes", "false") -// .build(); -// -// assertUpdate(noRedistributeWrites, "CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey']) AS SELECT * FROM nation", 25); -// -// // sanity check that table contains exactly 5 files -// assertThat(query("SELECT count(*) FROM \"" + tableName + "$files\"")).matches("VALUES CAST(5 AS BIGINT)"); -// -// verifySplitCount("SELECT * FROM " + tableName, 5); -// verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey = 3", 1); -// verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey < 2", 2); -// verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey < 0", 0); -// verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey > 1 AND regionkey < 4", 2); -// verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey % 5 = 3", 1); -// -// assertUpdate("DROP TABLE " + tableName); -// } + // @Test + // public void testSplitPruningForFilterOnPartitionColumn() + // { + // String tableName = "nation_partitioned_pruning"; + // + // assertUpdate("DROP TABLE IF EXISTS " + tableName); + // + // // disable writes redistribution to have predictable number of files written per + // partition (one). + // Session noRedistributeWrites = Session.builder(getSession()) + // .setSystemProperty("redistribute_writes", "false") + // .build(); + // + // assertUpdate(noRedistributeWrites, "CREATE TABLE " + tableName + " WITH (partitioning = + // ARRAY['regionkey']) AS SELECT * FROM nation", 25); + // + // // sanity check that table contains exactly 5 files + // assertThat(query("SELECT count(*) FROM \"" + tableName + "$files\"")).matches("VALUES + // CAST(5 AS BIGINT)"); + // + // verifySplitCount("SELECT * FROM " + tableName, 5); + // verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey = 3", 1); + // verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey < 2", 2); + // verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey < 0", 0); + // verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey > 1 AND regionkey < + // 4", 2); + // verifySplitCount("SELECT * FROM " + tableName + " WHERE regionkey % 5 = 3", 1); + // + // assertUpdate("DROP TABLE " + tableName); + // } @Test public void testAllAvailableTypes() { - assertUpdate("CREATE TABLE test_all_types (" + - " a_boolean boolean, " + - " an_integer integer, " + - " a_bigint bigint, " + - " a_real real, " + - " a_double double, " + - " a_short_decimal decimal(5,2), " + - " a_long_decimal decimal(38,20), " + - " a_varchar varchar, " + - " a_varbinary varbinary, " + - " a_date date, " + - " a_time time(6), " + - " a_timestamp timestamp(6), " + - " a_timestamptz timestamp(6) with time zone, " + - " a_uuid uuid, " + - " a_row row(id integer , vc varchar), " + - " an_array array(varchar), " + - " a_map map(integer, varchar) " + - ")"); - - String values = "VALUES (" + - "true, " + - "1, " + - "BIGINT '1', " + - "REAL '1.0', " + - "DOUBLE '1.0', " + - "CAST(1.0 AS decimal(5,2)), " + - "CAST(11.0 AS decimal(38,20)), " + - "VARCHAR 'onefsadfdsf', " + - "X'000102f0feff', " + - "DATE '2021-07-24'," + - "TIME '02:43:57.987654', " + - "TIMESTAMP '2021-07-24 03:43:57.987654'," + - "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + - "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + - "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + - "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + - "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one'])) "; - - String nullValues = nCopies(17, "NULL").stream() - .collect(joining(", ", "VALUES (", ")")); + assertUpdate( + "CREATE TABLE test_all_types (" + + " a_boolean boolean, " + + " an_integer integer, " + + " a_bigint bigint, " + + " a_real real, " + + " a_double double, " + + " a_short_decimal decimal(5,2), " + + " a_long_decimal decimal(38,20), " + + " a_varchar varchar, " + + " a_varbinary varbinary, " + + " a_date date, " + + " a_time time(6), " + + " a_timestamp timestamp(6), " + + " a_timestamptz timestamp(6) with time zone, " + + " a_uuid uuid, " + + " a_row row(id integer , vc varchar), " + + " an_array array(varchar), " + + " a_map map(integer, varchar) " + + ")"); + + String values = + "VALUES (" + + "true, " + + "1, " + + "BIGINT '1', " + + "REAL '1.0', " + + "DOUBLE '1.0', " + + "CAST(1.0 AS decimal(5,2)), " + + "CAST(11.0 AS decimal(38,20)), " + + "VARCHAR 'onefsadfdsf', " + + "X'000102f0feff', " + + "DATE '2021-07-24'," + + "TIME '02:43:57.987654', " + + "TIMESTAMP '2021-07-24 03:43:57.987654'," + + "TIMESTAMP '2021-07-24 04:43:57.987654 UTC', " + + "UUID '20050910-1330-11e9-ffff-2a86e4085a59', " + + "CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)), " + + "ARRAY[VARCHAR 'uno', 'dos', 'tres'], " + + "map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one'])) "; + + String nullValues = nCopies(17, "NULL").stream().collect(joining(", ", "VALUES (", ")")); assertUpdate("INSERT INTO test_all_types " + values, 1); assertUpdate("INSERT INTO test_all_types " + nullValues, 1); // SELECT - assertThat(query("SELECT * FROM test_all_types")) - .matches(values + " UNION ALL " + nullValues); + assertThat(query("SELECT * FROM test_all_types")).matches(values + " UNION ALL " + nullValues); // SELECT with predicates - assertThat(query("SELECT * FROM test_all_types WHERE " + - " a_boolean = true " + - "AND an_integer = 1 " + - "AND a_bigint = BIGINT '1' " + - "AND a_real = REAL '1.0' " + - "AND a_double = DOUBLE '1.0' " + - "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + - "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + - "AND a_varchar = VARCHAR 'onefsadfdsf' " + - "AND a_varbinary = X'000102f0feff' " + - "AND a_date = DATE '2021-07-24' " + - "AND a_time = TIME '02:43:57.987654' " + - "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + - "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + - "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + - "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + - "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + - "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + - "")) + assertThat( + query( + "SELECT * FROM test_all_types WHERE " + + " a_boolean = true " + + "AND an_integer = 1 " + + "AND a_bigint = BIGINT '1' " + + "AND a_real = REAL '1.0' " + + "AND a_double = DOUBLE '1.0' " + + "AND a_short_decimal = CAST(1.0 AS decimal(5,2)) " + + "AND a_long_decimal = CAST(11.0 AS decimal(38,20)) " + + "AND a_varchar = VARCHAR 'onefsadfdsf' " + + "AND a_varbinary = X'000102f0feff' " + + "AND a_date = DATE '2021-07-24' " + + "AND a_time = TIME '02:43:57.987654' " + + "AND a_timestamp = TIMESTAMP '2021-07-24 03:43:57.987654' " + + "AND a_timestamptz = TIMESTAMP '2021-07-24 04:43:57.987654 UTC' " + + "AND a_uuid = UUID '20050910-1330-11e9-ffff-2a86e4085a59' " + + "AND a_row = CAST(ROW(42, 'this is a random value') AS ROW(id int, vc varchar)) " + + "AND an_array = ARRAY[VARCHAR 'uno', 'dos', 'tres'] " + + "AND a_map = map(ARRAY[1,2], ARRAY['ek', VARCHAR 'one']) " + + "")) .matches(values); - assertThat(query("SELECT * FROM test_all_types WHERE " + - " a_boolean IS NULL " + - "AND an_integer IS NULL " + - "AND a_bigint IS NULL " + - "AND a_real IS NULL " + - "AND a_double IS NULL " + - "AND a_short_decimal IS NULL " + - "AND a_long_decimal IS NULL " + - "AND a_varchar IS NULL " + - "AND a_varbinary IS NULL " + - "AND a_date IS NULL " + - "AND a_time IS NULL " + - "AND a_timestamp IS NULL " + - "AND a_timestamptz IS NULL " + - "AND a_uuid IS NULL " + - "AND a_row IS NULL " + - "AND an_array IS NULL " + - "AND a_map IS NULL " + - "")) + assertThat( + query( + "SELECT * FROM test_all_types WHERE " + + " a_boolean IS NULL " + + "AND an_integer IS NULL " + + "AND a_bigint IS NULL " + + "AND a_real IS NULL " + + "AND a_double IS NULL " + + "AND a_short_decimal IS NULL " + + "AND a_long_decimal IS NULL " + + "AND a_varchar IS NULL " + + "AND a_varbinary IS NULL " + + "AND a_date IS NULL " + + "AND a_time IS NULL " + + "AND a_timestamp IS NULL " + + "AND a_timestamptz IS NULL " + + "AND a_uuid IS NULL " + + "AND a_row IS NULL " + + "AND an_array IS NULL " + + "AND a_map IS NULL " + + "")) .skippingTypesCheck() .matches(nullValues); // SHOW STATS assertThat(query("SHOW STATS FOR test_all_types")) .skippingTypesCheck() - .matches("VALUES " + - " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + - " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + - " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + - " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + - " ('a_varchar', " + (format == PARQUET ? "234e0" : "NULL") + ", NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_varbinary', " + (format == PARQUET ? "114e0" : "NULL") + ", NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + - " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_timestamp', NULL, NULL, 0.5e0, NULL, " + (format == ORC ? "'2021-07-24 03:43:57.987000', '2021-07-24 03:43:57.987999'" : "'2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'") + "), " + - " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + - " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + - " ('a_row', NULL, NULL, " + (format == ORC ? "0.5" : "NULL") + ", NULL, NULL, NULL), " + - " ('an_array', NULL, NULL, " + (format == ORC ? "0.5" : "NULL") + ", NULL, NULL, NULL), " + - " ('a_map', NULL, NULL, " + (format == ORC ? "0.5" : "NULL") + ", NULL, NULL, NULL), " + - " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); + .matches( + "VALUES " + + " ('a_boolean', NULL, NULL, 0.5e0, NULL, 'true', 'true'), " + + " ('an_integer', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_bigint', NULL, NULL, 0.5e0, NULL, '1', '1'), " + + " ('a_real', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_double', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_short_decimal', NULL, NULL, 0.5e0, NULL, '1.0', '1.0'), " + + " ('a_long_decimal', NULL, NULL, 0.5e0, NULL, '11.0', '11.0'), " + + " ('a_varchar', " + + (format == PARQUET ? "234e0" : "NULL") + + ", NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_varbinary', " + + (format == PARQUET ? "114e0" : "NULL") + + ", NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_date', NULL, NULL, 0.5e0, NULL, '2021-07-24', '2021-07-24'), " + + " ('a_time', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_timestamp', NULL, NULL, 0.5e0, NULL, " + + (format == ORC + ? "'2021-07-24 03:43:57.987000', '2021-07-24 03:43:57.987999'" + : "'2021-07-24 03:43:57.987654', '2021-07-24 03:43:57.987654'") + + "), " + + " ('a_timestamptz', NULL, NULL, 0.5e0, NULL, '2021-07-24 04:43:57.987 UTC', '2021-07-24 04:43:57.987 UTC'), " + + " ('a_uuid', NULL, NULL, 0.5e0, NULL, NULL, NULL), " + + " ('a_row', NULL, NULL, " + + (format == ORC ? "0.5" : "NULL") + + ", NULL, NULL, NULL), " + + " ('an_array', NULL, NULL, " + + (format == ORC ? "0.5" : "NULL") + + ", NULL, NULL, NULL), " + + " ('a_map', NULL, NULL, " + + (format == ORC ? "0.5" : "NULL") + + ", NULL, NULL, NULL), " + + " (NULL, NULL, NULL, NULL, 2e0, NULL, NULL)"); // $partitions String schema = getSession().getSchema().orElseThrow(); - assertThat(query("SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' AND table_name = 'test_all_types$partitions' ")) + assertThat( + query( + "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + + schema + + "' AND table_name = 'test_all_types$partitions' ")) .skippingTypesCheck() .matches("VALUES 'record_count', 'file_count', 'total_size', 'data'"); - assertThat(query("SELECT " + - " record_count," + - " file_count, " + - " data.a_boolean, " + - " data.an_integer, " + - " data.a_bigint, " + - " data.a_real, " + - " data.a_double, " + - " data.a_short_decimal, " + - " data.a_long_decimal, " + - " data.a_varchar, " + - " data.a_varbinary, " + - " data.a_date, " + - " data.a_time, " + - " data.a_timestamp, " + - " data.a_timestamptz, " + - " data.a_uuid " + - " FROM \"test_all_types$partitions\" ")) + assertThat( + query( + "SELECT " + + " record_count," + + " file_count, " + + " data.a_boolean, " + + " data.an_integer, " + + " data.a_bigint, " + + " data.a_real, " + + " data.a_double, " + + " data.a_short_decimal, " + + " data.a_long_decimal, " + + " data.a_varchar, " + + " data.a_varbinary, " + + " data.a_date, " + + " data.a_time, " + + " data.a_timestamp, " + + " data.a_timestamptz, " + + " data.a_uuid " + + " FROM \"test_all_types$partitions\" ")) .matches( - "VALUES (" + - " BIGINT '2', " + - " BIGINT '2', " + - " CAST(ROW(true, true, 1, NULL) AS ROW(min boolean, max boolean, null_count bigint, nan_count bigint)), " + - " CAST(ROW(1, 1, 1, NULL) AS ROW(min integer, max integer, null_count bigint, nan_count bigint)), " + - " CAST(ROW(1, 1, 1, NULL) AS ROW(min bigint, max bigint, null_count bigint, nan_count bigint)), " + - " CAST(ROW(1, 1, 1, NULL) AS ROW(min real, max real, null_count bigint, nan_count bigint)), " + - " CAST(ROW(1, 1, 1, NULL) AS ROW(min double, max double, null_count bigint, nan_count bigint)), " + - " CAST(ROW(1, 1, 1, NULL) AS ROW(min decimal(5,2), max decimal(5,2), null_count bigint, nan_count bigint)), " + - " CAST(ROW(11, 11, 1, NULL) AS ROW(min decimal(38,20), max decimal(38,20), null_count bigint, nan_count bigint)), " + - " CAST(ROW('onefsadfdsf', 'onefsadfdsf', 1, NULL) AS ROW(min varchar, max varchar, null_count bigint, nan_count bigint)), " + - (format == ORC ? - " CAST(ROW(NULL, NULL, 1, NULL) AS ROW(min varbinary, max varbinary, null_count bigint, nan_count bigint)), " : - " CAST(ROW(X'000102f0feff', X'000102f0feff', 1, NULL) AS ROW(min varbinary, max varbinary, null_count bigint, nan_count bigint)), ") + - " CAST(ROW(DATE '2021-07-24', DATE '2021-07-24', 1, NULL) AS ROW(min date, max date, null_count bigint, nan_count bigint)), " + - " CAST(ROW(TIME '02:43:57.987654', TIME '02:43:57.987654', 1, NULL) AS ROW(min time(6), max time(6), null_count bigint, nan_count bigint)), " + - (format == ORC ? - " CAST(ROW(TIMESTAMP '2021-07-24 03:43:57.987000', TIMESTAMP '2021-07-24 03:43:57.987999', 1, NULL) AS ROW(min timestamp(6), max timestamp(6), null_count bigint, nan_count bigint)), " : - " CAST(ROW(TIMESTAMP '2021-07-24 03:43:57.987654', TIMESTAMP '2021-07-24 03:43:57.987654', 1, NULL) AS ROW(min timestamp(6), max timestamp(6), null_count bigint, nan_count bigint)), ") + - (format == ORC ? - " CAST(ROW(TIMESTAMP '2021-07-24 04:43:57.987000 UTC', TIMESTAMP '2021-07-24 04:43:57.987999 UTC', 1, NULL) AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)), " : - " CAST(ROW(TIMESTAMP '2021-07-24 04:43:57.987654 UTC', TIMESTAMP '2021-07-24 04:43:57.987654 UTC', 1, NULL) AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)), ") + - (format == ORC ? - " CAST(ROW(NULL, NULL, 1, NULL) AS ROW(min uuid, max uuid, null_count bigint, nan_count bigint)) " : - " CAST(ROW(UUID '20050910-1330-11e9-ffff-2a86e4085a59', UUID '20050910-1330-11e9-ffff-2a86e4085a59', 1, NULL) AS ROW(min uuid, max uuid, null_count bigint, nan_count bigint)) " - ) + - ")"); + "VALUES (" + + " BIGINT '2', " + + " BIGINT '2', " + + " CAST(ROW(true, true, 1, NULL) AS ROW(min boolean, max boolean, null_count bigint, nan_count bigint)), " + + " CAST(ROW(1, 1, 1, NULL) AS ROW(min integer, max integer, null_count bigint, nan_count bigint)), " + + " CAST(ROW(1, 1, 1, NULL) AS ROW(min bigint, max bigint, null_count bigint, nan_count bigint)), " + + " CAST(ROW(1, 1, 1, NULL) AS ROW(min real, max real, null_count bigint, nan_count bigint)), " + + " CAST(ROW(1, 1, 1, NULL) AS ROW(min double, max double, null_count bigint, nan_count bigint)), " + + " CAST(ROW(1, 1, 1, NULL) AS ROW(min decimal(5,2), max decimal(5,2), null_count bigint, nan_count bigint)), " + + " CAST(ROW(11, 11, 1, NULL) AS ROW(min decimal(38,20), max decimal(38,20), null_count bigint, nan_count bigint)), " + + " CAST(ROW('onefsadfdsf', 'onefsadfdsf', 1, NULL) AS ROW(min varchar, max varchar, null_count bigint, nan_count bigint)), " + + (format == ORC + ? " CAST(ROW(NULL, NULL, 1, NULL) AS ROW(min varbinary, max varbinary, null_count bigint, nan_count bigint)), " + : " CAST(ROW(X'000102f0feff', X'000102f0feff', 1, NULL) AS ROW(min varbinary, max varbinary, null_count bigint, nan_count bigint)), ") + + " CAST(ROW(DATE '2021-07-24', DATE '2021-07-24', 1, NULL) AS ROW(min date, max date, null_count bigint, nan_count bigint)), " + + " CAST(ROW(TIME '02:43:57.987654', TIME '02:43:57.987654', 1, NULL) AS ROW(min time(6), max time(6), null_count bigint, nan_count bigint)), " + + (format == ORC + ? " CAST(ROW(TIMESTAMP '2021-07-24 03:43:57.987000', TIMESTAMP '2021-07-24 03:43:57.987999', 1, NULL) AS ROW(min timestamp(6), max timestamp(6), null_count bigint, nan_count bigint)), " + : " CAST(ROW(TIMESTAMP '2021-07-24 03:43:57.987654', TIMESTAMP '2021-07-24 03:43:57.987654', 1, NULL) AS ROW(min timestamp(6), max timestamp(6), null_count bigint, nan_count bigint)), ") + + (format == ORC + ? " CAST(ROW(TIMESTAMP '2021-07-24 04:43:57.987000 UTC', TIMESTAMP '2021-07-24 04:43:57.987999 UTC', 1, NULL) AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)), " + : " CAST(ROW(TIMESTAMP '2021-07-24 04:43:57.987654 UTC', TIMESTAMP '2021-07-24 04:43:57.987654 UTC', 1, NULL) AS ROW(min timestamp(6) with time zone, max timestamp(6) with time zone, null_count bigint, nan_count bigint)), ") + + (format == ORC + ? " CAST(ROW(NULL, NULL, 1, NULL) AS ROW(min uuid, max uuid, null_count bigint, nan_count bigint)) " + : " CAST(ROW(UUID '20050910-1330-11e9-ffff-2a86e4085a59', UUID '20050910-1330-11e9-ffff-2a86e4085a59', 1, NULL) AS ROW(min uuid, max uuid, null_count bigint, nan_count bigint)) ") + + ")"); assertUpdate("DROP TABLE test_all_types"); } -// @Test(dataProvider = "repartitioningDataProvider") -// public void testRepartitionDataOnCtas(Session session, String partitioning, int expectedFiles) -// { -// testRepartitionData(session, "tpch.tiny.orders", true, partitioning, expectedFiles); -// } -// -// @Test(dataProvider = "repartitioningDataProvider") -// public void testRepartitionDataOnInsert(Session session, String partitioning, int expectedFiles) -// { -// testRepartitionData(session, "tpch.tiny.orders", false, partitioning, expectedFiles); -// } + // @Test(dataProvider = "repartitioningDataProvider") + // public void testRepartitionDataOnCtas(Session session, String partitioning, int + // expectedFiles) + // { + // testRepartitionData(session, "tpch.tiny.orders", true, partitioning, expectedFiles); + // } + // + // @Test(dataProvider = "repartitioningDataProvider") + // public void testRepartitionDataOnInsert(Session session, String partitioning, int + // expectedFiles) + // { + // testRepartitionData(session, "tpch.tiny.orders", false, partitioning, expectedFiles); + // } @DataProvider public Object[][] repartitioningDataProvider() { Session defaultSession = getSession(); - // For identity-only partitioning, Iceberg connector returns ConnectorTableLayout with partitionColumns set, but without partitioning. - // This is treated by engine as "preferred", but not mandatory partitioning, and gets ignored if stats suggest number of partitions - // written is low. Without partitioning, number of files created is nondeterministic, as a writer (worker node) may or may not receive data. - Session obeyConnectorPartitioning = Session.builder(defaultSession) - .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "1") - .build(); + // For identity-only partitioning, Iceberg connector returns ConnectorTableLayout with + // partitionColumns set, but without partitioning. + // This is treated by engine as "preferred", but not mandatory partitioning, and gets ignored if + // stats suggest number of partitions + // written is low. Without partitioning, number of files created is nondeterministic, as a + // writer (worker node) may or may not receive data. + Session obeyConnectorPartitioning = + Session.builder(defaultSession) + .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "1") + .build(); - return new Object[][]{ - // identity partitioning column - {obeyConnectorPartitioning, "'orderstatus'", 3}, - // bucketing - {defaultSession, "'bucket(custkey, 13)'", 13}, - // varchar-based - {defaultSession, "'truncate(comment, 1)'", 35}, - // complex; would exceed 100 open writers limit in IcebergPageSink without write repartitioning - {defaultSession, "'bucket(custkey, 4)', 'truncate(comment, 1)'", 131}, - // same column multiple times - {defaultSession, "'truncate(comment, 1)', 'orderstatus', 'bucket(comment, 2)'", 180}, + return new Object[][] { + // identity partitioning column + {obeyConnectorPartitioning, "'orderstatus'", 3}, + // bucketing + {defaultSession, "'bucket(custkey, 13)'", 13}, + // varchar-based + {defaultSession, "'truncate(comment, 1)'", 35}, + // complex; would exceed 100 open writers limit in IcebergPageSink without write + // repartitioning + {defaultSession, "'bucket(custkey, 4)', 'truncate(comment, 1)'", 131}, + // same column multiple times + {defaultSession, "'truncate(comment, 1)', 'orderstatus', 'bucket(comment, 2)'", 180}, }; } -// @Test -// public void testStatsBasedRepartitionDataOnCtas() -// { -// testStatsBasedRepartitionData(true); -// } -// -// @Test -// public void testStatsBasedRepartitionDataOnInsert() -// { -// testStatsBasedRepartitionData(false); -// } + // @Test + // public void testStatsBasedRepartitionDataOnCtas() + // { + // testStatsBasedRepartitionData(true); + // } + // + // @Test + // public void testStatsBasedRepartitionDataOnInsert() + // { + // testStatsBasedRepartitionData(false); + // } private void testStatsBasedRepartitionData(boolean ctas) { - Session sessionRepartitionSmall = Session.builder(getSession()) - .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "2") - .build(); - Session sessionRepartitionMany = Session.builder(getSession()) - .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "5") - .setSystemProperty(SCALE_WRITERS, "false") - .build(); - // Use DISTINCT to add data redistribution between source table and the writer. This makes it more likely that all writers get some data. - String sourceRelation = "(SELECT DISTINCT orderkey, custkey, orderstatus FROM tpch.tiny.orders)"; - testRepartitionData( - sessionRepartitionSmall, - sourceRelation, - ctas, - "'orderstatus'", - 3); - // Test uses relatively small table (60K rows). When engine doesn't redistribute data for writes, + Session sessionRepartitionSmall = + Session.builder(getSession()) + .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "2") + .build(); + Session sessionRepartitionMany = + Session.builder(getSession()) + .setSystemProperty(PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS, "5") + .setSystemProperty(SCALE_WRITERS, "false") + .build(); + // Use DISTINCT to add data redistribution between source table and the writer. This makes it + // more likely that all writers get some data. + String sourceRelation = + "(SELECT DISTINCT orderkey, custkey, orderstatus FROM tpch.tiny.orders)"; + testRepartitionData(sessionRepartitionSmall, sourceRelation, ctas, "'orderstatus'", 3); + // Test uses relatively small table (60K rows). When engine doesn't redistribute data for + // writes, // occasionally a worker node doesn't get any data and fewer files get created. - assertEventually(() -> { - testRepartitionData( - sessionRepartitionMany, - sourceRelation, - ctas, - "'orderstatus'", - 9); - }); - } - - private void testRepartitionData(Session session, String sourceRelation, boolean ctas, String partitioning, int expectedFiles) { - String tableName = "repartition" + - "_" + sourceRelation.replaceAll("[^a-zA-Z0-9]", "") + - (ctas ? "ctas" : "insert") + - "_" + partitioning.replaceAll("[^a-zA-Z0-9]", "") + - "_" + randomNameSuffix(); + assertEventually( + () -> { + testRepartitionData(sessionRepartitionMany, sourceRelation, ctas, "'orderstatus'", 9); + }); + } + + private void testRepartitionData( + Session session, + String sourceRelation, + boolean ctas, + String partitioning, + int expectedFiles) { + String tableName = + "repartition" + + "_" + + sourceRelation.replaceAll("[^a-zA-Z0-9]", "") + + (ctas ? "ctas" : "insert") + + "_" + + partitioning.replaceAll("[^a-zA-Z0-9]", "") + + "_" + + randomNameSuffix(); long rowCount = (long) computeScalar(session, "SELECT count(*) FROM " + sourceRelation); if (ctas) { assertUpdate( session, - "CREATE TABLE " + tableName + " WITH (partitioning = ARRAY[" + partitioning + "]) " + - "AS SELECT * FROM " + sourceRelation, + "CREATE TABLE " + + tableName + + " WITH (partitioning = ARRAY[" + + partitioning + + "]) " + + "AS SELECT * FROM " + + sourceRelation, rowCount); } else { assertUpdate( session, - "CREATE TABLE " + tableName + " WITH (partitioning = ARRAY[" + partitioning + "]) " + - "AS SELECT * FROM " + sourceRelation + " WITH NO DATA", + "CREATE TABLE " + + tableName + + " WITH (partitioning = ARRAY[" + + partitioning + + "]) " + + "AS SELECT * FROM " + + sourceRelation + + " WITH NO DATA", 0); // Use source table big enough so that there will be multiple pages being written. - assertUpdate(session, "INSERT INTO " + tableName + " SELECT * FROM " + sourceRelation, rowCount); + assertUpdate( + session, "INSERT INTO " + tableName + " SELECT * FROM " + sourceRelation, rowCount); } // verify written data @@ -2599,19 +3107,26 @@ public void testGetIcebergTableProperties() { } private void verifySplitCount(String query, int expectedSplitCount) { - MaterializedResultWithQueryId selectAllPartitionsResult = getDistributedQueryRunner().executeWithQueryId(getSession(), query); - assertEqualsIgnoreOrder(selectAllPartitionsResult.getResult().getMaterializedRows(), computeActual(withoutPredicatePushdown(getSession()), query).getMaterializedRows()); + MaterializedResultWithQueryId selectAllPartitionsResult = + getDistributedQueryRunner().executeWithQueryId(getSession(), query); + assertEqualsIgnoreOrder( + selectAllPartitionsResult.getResult().getMaterializedRows(), + computeActual(withoutPredicatePushdown(getSession()), query).getMaterializedRows()); verifySplitCount(selectAllPartitionsResult.getQueryId(), expectedSplitCount); } - private void verifyPredicatePushdownDataRead(@Language("SQL") String query, boolean supportsPushdown) { - MaterializedResultWithQueryId resultWithPredicatePushdown = getDistributedQueryRunner().executeWithQueryId(getSession(), query); - MaterializedResultWithQueryId resultWithoutPredicatePushdown = getDistributedQueryRunner().executeWithQueryId( - withoutPredicatePushdown(getSession()), - query); + private void verifyPredicatePushdownDataRead( + @Language("SQL") String query, boolean supportsPushdown) { + MaterializedResultWithQueryId resultWithPredicatePushdown = + getDistributedQueryRunner().executeWithQueryId(getSession(), query); + MaterializedResultWithQueryId resultWithoutPredicatePushdown = + getDistributedQueryRunner() + .executeWithQueryId(withoutPredicatePushdown(getSession()), query); - DataSize withPushdownDataSize = getOperatorStats(resultWithPredicatePushdown.getQueryId()).getInputDataSize(); - DataSize withoutPushdownDataSize = getOperatorStats(resultWithoutPredicatePushdown.getQueryId()).getInputDataSize(); + DataSize withPushdownDataSize = + getOperatorStats(resultWithPredicatePushdown.getQueryId()).getInputDataSize(); + DataSize withoutPushdownDataSize = + getOperatorStats(resultWithoutPredicatePushdown.getQueryId()).getInputDataSize(); if (supportsPushdown) { assertThat(withPushdownDataSize).isLessThan(withoutPushdownDataSize); } else { @@ -2640,16 +3155,21 @@ private void verifySplitCount(QueryId queryId, long expectedSplitCount) { private OperatorStats getOperatorStats(QueryId queryId) { try { - return getDistributedQueryRunner().getCoordinator() + return getDistributedQueryRunner() + .getCoordinator() .getQueryManager() .getFullQueryInfo(queryId) .getQueryStats() .getOperatorSummaries() .stream() - .filter(summary -> summary.getOperatorType().startsWith("TableScan") || summary.getOperatorType().startsWith("Scan")) + .filter( + summary -> + summary.getOperatorType().startsWith("TableScan") + || summary.getOperatorType().startsWith("Scan")) .collect(onlyElement()); } catch (NoSuchElementException e) { - throw new RuntimeException("Couldn't find operator summary, probably due to query statistic collection error", e); + throw new RuntimeException( + "Couldn't find operator summary, probably due to query statistic collection error", e); } } @@ -2659,33 +3179,43 @@ protected TestTable createTableWithDefaultColumns() { } @Override - protected Optional filterDataMappingSmokeTestData(DataMappingTestSetup dataMappingTestSetup) { + protected Optional filterDataMappingSmokeTestData( + DataMappingTestSetup dataMappingTestSetup) { String typeName = dataMappingTestSetup.getTrinoTypeName(); - if (typeName.equals("tinyint") - || typeName.equals("smallint") - || typeName.startsWith("char(")) { + if (typeName.equals("tinyint") || typeName.equals("smallint") || typeName.startsWith("char(")) { // These types are not supported by arctic return Optional.of(dataMappingTestSetup.asUnsupported()); } - // According to arctic specification all time and timestamp values are stored with microsecond precision. + // According to arctic specification all time and timestamp values are stored with microsecond + // precision. if (typeName.equals("time")) { - return Optional.of(new DataMappingTestSetup("time(6)", "TIME '15:03:00'", "TIME '23:59:59.999999'")); + return Optional.of( + new DataMappingTestSetup("time(6)", "TIME '15:03:00'", "TIME '23:59:59.999999'")); } if (typeName.equals("timestamp")) { - return Optional.of(new DataMappingTestSetup("timestamp(6)", "TIMESTAMP '2020-02-12 15:03:00'", "TIMESTAMP '2199-12-31 23:59:59.999999'")); + return Optional.of( + new DataMappingTestSetup( + "timestamp(6)", + "TIMESTAMP '2020-02-12 15:03:00'", + "TIMESTAMP '2199-12-31 23:59:59.999999'")); } if (typeName.equals("timestamp(3) with time zone")) { - return Optional.of(new DataMappingTestSetup("timestamp(6) with time zone", "TIMESTAMP '2020-02-12 15:03:00 +01:00'", "TIMESTAMP '9999-12-31 23:59:59.999999 +12:00'")); + return Optional.of( + new DataMappingTestSetup( + "timestamp(6) with time zone", + "TIMESTAMP '2020-02-12 15:03:00 +01:00'", + "TIMESTAMP '9999-12-31 23:59:59.999999 +12:00'")); } return Optional.of(dataMappingTestSetup); } @Override - protected Optional filterCaseSensitiveDataMappingTestData(DataMappingTestSetup dataMappingTestSetup) { + protected Optional filterCaseSensitiveDataMappingTestData( + DataMappingTestSetup dataMappingTestSetup) { String typeName = dataMappingTestSetup.getTrinoTypeName(); if (typeName.equals("char(1)")) { return Optional.of(dataMappingTestSetup.asUnsupported()); @@ -2693,26 +3223,30 @@ protected Optional filterCaseSensitiveDataMappingTestData( return Optional.of(dataMappingTestSetup); } -// @Test -// public void testAmbiguousColumnsWithDots() -// { -// assertThatThrownBy(() -> assertUpdate("CREATE TABLE ambiguous (\"a.cow\" BIGINT, a ROW(cow BIGINT))")) -// .hasMessage("Invalid schema: multiple fields for name a.cow: 1 and 3"); -// -// assertUpdate("CREATE TABLE ambiguous (\"a.cow\" BIGINT, b ROW(cow BIGINT))"); -// assertThatThrownBy(() -> assertUpdate("ALTER TABLE ambiguous RENAME COLUMN b TO a")) -// .hasMessage("Invalid schema: multiple fields for name a.cow: 1 and 3"); -// assertUpdate("DROP TABLE ambiguous"); -// -// assertUpdate("CREATE TABLE ambiguous (a ROW(cow BIGINT))"); -// assertThatThrownBy(() -> assertUpdate("ALTER TABLE ambiguous ADD COLUMN \"a.cow\" BIGINT")) -// .hasMessage("Cannot add column with ambiguous name: a.cow, use addColumn(parent, name, type)"); -// assertUpdate("DROP TABLE ambiguous"); -// } + // @Test + // public void testAmbiguousColumnsWithDots() + // { + // assertThatThrownBy(() -> assertUpdate("CREATE TABLE ambiguous (\"a.cow\" BIGINT, a + // ROW(cow BIGINT))")) + // .hasMessage("Invalid schema: multiple fields for name a.cow: 1 and 3"); + // + // assertUpdate("CREATE TABLE ambiguous (\"a.cow\" BIGINT, b ROW(cow BIGINT))"); + // assertThatThrownBy(() -> assertUpdate("ALTER TABLE ambiguous RENAME COLUMN b TO a")) + // .hasMessage("Invalid schema: multiple fields for name a.cow: 1 and 3"); + // assertUpdate("DROP TABLE ambiguous"); + // + // assertUpdate("CREATE TABLE ambiguous (a ROW(cow BIGINT))"); + // assertThatThrownBy(() -> assertUpdate("ALTER TABLE ambiguous ADD COLUMN \"a.cow\" + // BIGINT")) + // .hasMessage("Cannot add column with ambiguous name: a.cow, use addColumn(parent, + // name, type)"); + // assertUpdate("DROP TABLE ambiguous"); + // } @Test public void testSchemaEvolutionWithDereferenceProjections() { - // Fields are identified uniquely based on unique id's. If a column is dropped and recreated with the same name it should not return dropped data. + // Fields are identified uniquely based on unique id's. If a column is dropped and recreated + // with the same name it should not return dropped data. assertUpdate("CREATE TABLE evolve_test (dummy BIGINT, a row(b BIGINT, c VARCHAR))"); assertUpdate("INSERT INTO evolve_test VALUES (1, ROW(1, 'abc'))", 1); assertUpdate("ALTER TABLE evolve_test DROP COLUMN a"); @@ -2721,7 +3255,8 @@ public void testSchemaEvolutionWithDereferenceProjections() { assertUpdate("DROP TABLE evolve_test"); // Very changing subfield ordering does not revive dropped data - assertUpdate("CREATE TABLE evolve_test (dummy BIGINT, a ROW(b BIGINT, c VARCHAR), d BIGINT) with (partitioning = ARRAY['d'])"); + assertUpdate( + "CREATE TABLE evolve_test (dummy BIGINT, a ROW(b BIGINT, c VARCHAR), d BIGINT) with (partitioning = ARRAY['d'])"); assertUpdate("INSERT INTO evolve_test VALUES (1, ROW(2, 'abc'), 3)", 1); assertUpdate("ALTER TABLE evolve_test DROP COLUMN a"); assertUpdate("ALTER TABLE evolve_test ADD COLUMN a ROW(c VARCHAR, b BIGINT)"); @@ -2733,155 +3268,221 @@ public void testSchemaEvolutionWithDereferenceProjections() { @Test public void testHighlyNestedData() { - assertUpdate("CREATE TABLE nested_data (id INT, row_t ROW(f1 INT, f2 INT, row_t ROW (f1 INT, f2 INT, row_t ROW(f1 INT, f2 INT))))"); - assertUpdate("INSERT INTO nested_data VALUES (1, ROW(2, 3, ROW(4, 5, ROW(6, 7)))), (11, ROW(12, 13, ROW(14, 15, ROW(16, 17))))", 2); + assertUpdate( + "CREATE TABLE nested_data (id INT, row_t ROW(f1 INT, f2 INT, row_t ROW (f1 INT, f2 INT, row_t ROW(f1 INT, f2 INT))))"); + assertUpdate( + "INSERT INTO nested_data VALUES (1, ROW(2, 3, ROW(4, 5, ROW(6, 7)))), (11, ROW(12, 13, ROW(14, 15, ROW(16, 17))))", + 2); assertUpdate("INSERT INTO nested_data VALUES (21, ROW(22, 23, ROW(24, 25, ROW(26, 27))))", 1); // Test select projected columns, with and without their parent column - assertQuery("SELECT id, row_t.row_t.row_t.f2 FROM nested_data", "VALUES (1, 7), (11, 17), (21, 27)"); - assertQuery("SELECT id, row_t.row_t.row_t.f2, CAST(row_t AS JSON) FROM nested_data", - "VALUES (1, 7, '{\"f1\":2,\"f2\":3,\"row_t\":{\"f1\":4,\"f2\":5,\"row_t\":{\"f1\":6,\"f2\":7}}}'), " + - "(11, 17, '{\"f1\":12,\"f2\":13,\"row_t\":{\"f1\":14,\"f2\":15,\"row_t\":{\"f1\":16,\"f2\":17}}}'), " + - "(21, 27, '{\"f1\":22,\"f2\":23,\"row_t\":{\"f1\":24,\"f2\":25,\"row_t\":{\"f1\":26,\"f2\":27}}}')"); + assertQuery( + "SELECT id, row_t.row_t.row_t.f2 FROM nested_data", "VALUES (1, 7), (11, 17), (21, 27)"); + assertQuery( + "SELECT id, row_t.row_t.row_t.f2, CAST(row_t AS JSON) FROM nested_data", + "VALUES (1, 7, '{\"f1\":2,\"f2\":3,\"row_t\":{\"f1\":4,\"f2\":5,\"row_t\":{\"f1\":6,\"f2\":7}}}'), " + + "(11, 17, '{\"f1\":12,\"f2\":13,\"row_t\":{\"f1\":14,\"f2\":15,\"row_t\":{\"f1\":16,\"f2\":17}}}'), " + + "(21, 27, '{\"f1\":22,\"f2\":23,\"row_t\":{\"f1\":24,\"f2\":25,\"row_t\":{\"f1\":26,\"f2\":27}}}')"); // Test predicates on immediate child column and deeper nested column - assertQuery("SELECT id, CAST(row_t.row_t.row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 = 27", "VALUES (21, '{\"f1\":26,\"f2\":27}')"); - assertQuery("SELECT id, CAST(row_t.row_t.row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 > 20", "VALUES (21, '{\"f1\":26,\"f2\":27}')"); - assertQuery("SELECT id, CAST(row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 = 27", + assertQuery( + "SELECT id, CAST(row_t.row_t.row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 = 27", + "VALUES (21, '{\"f1\":26,\"f2\":27}')"); + assertQuery( + "SELECT id, CAST(row_t.row_t.row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 > 20", + "VALUES (21, '{\"f1\":26,\"f2\":27}')"); + assertQuery( + "SELECT id, CAST(row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 = 27", "VALUES (21, '{\"f1\":22,\"f2\":23,\"row_t\":{\"f1\":24,\"f2\":25,\"row_t\":{\"f1\":26,\"f2\":27}}}')"); - assertQuery("SELECT id, CAST(row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 > 20", + assertQuery( + "SELECT id, CAST(row_t AS JSON) FROM nested_data WHERE row_t.row_t.row_t.f2 > 20", "VALUES (21, '{\"f1\":22,\"f2\":23,\"row_t\":{\"f1\":24,\"f2\":25,\"row_t\":{\"f1\":26,\"f2\":27}}}')"); // Test predicates on parent columns - assertQuery("SELECT id, row_t.row_t.row_t.f1 FROM nested_data WHERE row_t.row_t.row_t = ROW(16, 17)", "VALUES (11, 16)"); - assertQuery("SELECT id, row_t.row_t.row_t.f1 FROM nested_data WHERE row_t = ROW(22, 23, ROW(24, 25, ROW(26, 27)))", "VALUES (21, 26)"); + assertQuery( + "SELECT id, row_t.row_t.row_t.f1 FROM nested_data WHERE row_t.row_t.row_t = ROW(16, 17)", + "VALUES (11, 16)"); + assertQuery( + "SELECT id, row_t.row_t.row_t.f1 FROM nested_data WHERE row_t = ROW(22, 23, ROW(24, 25, ROW(26, 27)))", + "VALUES (21, 26)"); assertUpdate("DROP TABLE IF EXISTS nested_data"); } -// @Test -// public void testProjectionPushdownAfterRename() -// { -// assertUpdate("CREATE TABLE projection_pushdown_after_rename (id INT, a ROW(b INT, c ROW (d INT)))"); -// assertUpdate("INSERT INTO projection_pushdown_after_rename VALUES (1, ROW(2, ROW(3))), (11, ROW(12, ROW(13)))", 2); -// assertUpdate("INSERT INTO projection_pushdown_after_rename VALUES (21, ROW(22, ROW(23)))", 1); -// -// String expected = "VALUES (11, JSON '{\"b\":12,\"c\":{\"d\":13}}', 13)"; -// assertQuery("SELECT id, CAST(a AS JSON), a.c.d FROM projection_pushdown_after_rename WHERE a.b = 12", expected); -//// assertUpdate("ALTER TABLE projection_pushdown_after_rename RENAME COLUMN a TO row_t"); -// assertQuery("SELECT id, CAST(row_t AS JSON), row_t.c.d FROM projection_pushdown_after_rename WHERE row_t.b = 12", expected); -// -// assertUpdate("DROP TABLE IF EXISTS projection_pushdown_after_rename"); -// } + // @Test + // public void testProjectionPushdownAfterRename() + // { + // assertUpdate("CREATE TABLE projection_pushdown_after_rename (id INT, a ROW(b INT, c ROW + // (d INT)))"); + // assertUpdate("INSERT INTO projection_pushdown_after_rename VALUES (1, ROW(2, ROW(3))), + // (11, ROW(12, ROW(13)))", 2); + // assertUpdate("INSERT INTO projection_pushdown_after_rename VALUES (21, ROW(22, + // ROW(23)))", 1); + // + // String expected = "VALUES (11, JSON '{\"b\":12,\"c\":{\"d\":13}}', 13)"; + // assertQuery("SELECT id, CAST(a AS JSON), a.c.d FROM projection_pushdown_after_rename + // WHERE a.b = 12", expected); + //// assertUpdate("ALTER TABLE projection_pushdown_after_rename RENAME COLUMN a TO row_t"); + // assertQuery("SELECT id, CAST(row_t AS JSON), row_t.c.d FROM + // projection_pushdown_after_rename WHERE row_t.b = 12", expected); + // + // assertUpdate("DROP TABLE IF EXISTS projection_pushdown_after_rename"); + // } @Test public void testProjectionWithCaseSensitiveField() { - assertUpdate("CREATE TABLE projection_with_case_sensitive_field (id INT, a ROW(\"UPPER_CASE\" INT, \"lower_case\" INT, \"MiXeD_cAsE\" INT))"); - assertUpdate("INSERT INTO projection_with_case_sensitive_field VALUES (1, ROW(2, 3, 4)), (5, ROW(6, 7, 8))", 2); + assertUpdate( + "CREATE TABLE projection_with_case_sensitive_field (id INT, a ROW(\"UPPER_CASE\" INT, \"lower_case\" INT, \"MiXeD_cAsE\" INT))"); + assertUpdate( + "INSERT INTO projection_with_case_sensitive_field VALUES (1, ROW(2, 3, 4)), (5, ROW(6, 7, 8))", + 2); String expected = "VALUES (2, 3, 4), (6, 7, 8)"; - assertQuery("SELECT a.UPPER_CASE, a.lower_case, a.MiXeD_cAsE FROM projection_with_case_sensitive_field", expected); - assertQuery("SELECT a.upper_case, a.lower_case, a.mixed_case FROM projection_with_case_sensitive_field", expected); - assertQuery("SELECT a.UPPER_CASE, a.LOWER_CASE, a.MIXED_CASE FROM projection_with_case_sensitive_field", expected); + assertQuery( + "SELECT a.UPPER_CASE, a.lower_case, a.MiXeD_cAsE FROM projection_with_case_sensitive_field", + expected); + assertQuery( + "SELECT a.upper_case, a.lower_case, a.mixed_case FROM projection_with_case_sensitive_field", + expected); + assertQuery( + "SELECT a.UPPER_CASE, a.LOWER_CASE, a.MIXED_CASE FROM projection_with_case_sensitive_field", + expected); assertUpdate("DROP TABLE IF EXISTS projection_with_case_sensitive_field"); } @Test public void testProjectionPushdownOnPartitionedTables() { - assertUpdate("CREATE TABLE table_with_partition_at_beginning (id BIGINT, root ROW(f1 BIGINT, f2 BIGINT)) WITH (partitioning = ARRAY['id'])"); - assertUpdate("INSERT INTO table_with_partition_at_beginning VALUES (1, ROW(1, 2)), (1, ROW(2, 3)), (1, ROW(3, 4))", 3); - assertQuery("SELECT id, root.f2 FROM table_with_partition_at_beginning", "VALUES (1, 2), (1, 3), (1, 4)"); + assertUpdate( + "CREATE TABLE table_with_partition_at_beginning (id BIGINT, root ROW(f1 BIGINT, f2 BIGINT)) WITH (partitioning = ARRAY['id'])"); + assertUpdate( + "INSERT INTO table_with_partition_at_beginning VALUES (1, ROW(1, 2)), (1, ROW(2, 3)), (1, ROW(3, 4))", + 3); + assertQuery( + "SELECT id, root.f2 FROM table_with_partition_at_beginning", + "VALUES (1, 2), (1, 3), (1, 4)"); assertUpdate("DROP TABLE table_with_partition_at_beginning"); - assertUpdate("CREATE TABLE table_with_partition_at_end (root ROW(f1 BIGINT, f2 BIGINT), id BIGINT) WITH (partitioning = ARRAY['id'])"); - assertUpdate("INSERT INTO table_with_partition_at_end VALUES (ROW(1, 2), 1), (ROW(2, 3), 1), (ROW(3, 4), 1)", 3); - assertQuery("SELECT root.f2, id FROM table_with_partition_at_end", "VALUES (2, 1), (3, 1), (4, 1)"); + assertUpdate( + "CREATE TABLE table_with_partition_at_end (root ROW(f1 BIGINT, f2 BIGINT), id BIGINT) WITH (partitioning = ARRAY['id'])"); + assertUpdate( + "INSERT INTO table_with_partition_at_end VALUES (ROW(1, 2), 1), (ROW(2, 3), 1), (ROW(3, 4), 1)", + 3); + assertQuery( + "SELECT root.f2, id FROM table_with_partition_at_end", "VALUES (2, 1), (3, 1), (4, 1)"); assertUpdate("DROP TABLE table_with_partition_at_end"); } @Test public void testProjectionPushdownOnPartitionedTableWithComments() { - assertUpdate("CREATE TABLE test_projection_pushdown_comments (id BIGINT COMMENT 'id', qid BIGINT COMMENT 'QID', root ROW(f1 BIGINT, f2 BIGINT) COMMENT 'root') WITH (partitioning = ARRAY['id'])"); - assertUpdate("INSERT INTO test_projection_pushdown_comments VALUES (1, 1, ROW(1, 2)), (1, 2, ROW(2, 3)), (1, 3, ROW(3, 4))", 3); - assertQuery("SELECT id, root.f2 FROM test_projection_pushdown_comments", "VALUES (1, 2), (1, 3), (1, 4)"); + assertUpdate( + "CREATE TABLE test_projection_pushdown_comments (id BIGINT COMMENT 'id', qid BIGINT COMMENT 'QID', root ROW(f1 BIGINT, f2 BIGINT) COMMENT 'root') WITH (partitioning = ARRAY['id'])"); + assertUpdate( + "INSERT INTO test_projection_pushdown_comments VALUES (1, 1, ROW(1, 2)), (1, 2, ROW(2, 3)), (1, 3, ROW(3, 4))", + 3); + assertQuery( + "SELECT id, root.f2 FROM test_projection_pushdown_comments", + "VALUES (1, 2), (1, 3), (1, 4)"); // Query with predicates on both nested and top-level columns (with partition column) - assertQuery("SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE id = 1 AND qid = 1 AND root.f1 = 1", "VALUES (1, 2)"); + assertQuery( + "SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE id = 1 AND qid = 1 AND root.f1 = 1", + "VALUES (1, 2)"); // Query with predicates on both nested and top-level columns (no partition column) - assertQuery("SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE qid = 2 AND root.f1 = 2", "VALUES (1, 3)"); + assertQuery( + "SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE qid = 2 AND root.f1 = 2", + "VALUES (1, 3)"); // Query with predicates on top-level columns only - assertQuery("SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE id = 1 AND qid = 1", "VALUES (1, 2)"); + assertQuery( + "SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE id = 1 AND qid = 1", + "VALUES (1, 2)"); // Query with predicates on nested columns only - assertQuery("SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE root.f1 = 2", "VALUES (1, 3)"); + assertQuery( + "SELECT id, root.f2 FROM test_projection_pushdown_comments WHERE root.f1 = 2", + "VALUES (1, 3)"); assertUpdate("DROP TABLE IF EXISTS test_projection_pushdown_comments"); } -// @Test -// public void testOptimize() -// throws Exception { -// String tableName = "test_optimize_" + randomNameSuffix(); -// assertUpdate("CREATE TABLE " + tableName + " (key integer, value varchar) WITH (format_version = 1)"); -// -// // DistributedQueryRunner sets node-scheduler.include-coordinator by default, so include coordinator -// int workerCount = getQueryRunner().getNodeCount(); -// -// // optimize an empty table -// assertQuerySucceeds("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); -// assertThat(getActiveFiles(tableName)).isEmpty(); -// -// assertUpdate("INSERT INTO " + tableName + " VALUES (11, 'eleven')", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES (12, 'zwölf')", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES (13, 'trzynaście')", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES (14, 'quatorze')", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES (15, 'пʼятнадцять')", 1); -// -// List initialFiles = getActiveFiles(tableName); -// assertThat(initialFiles) -// .hasSize(5) -// // Verify we have sufficiently many test rows with respect to worker count. -// .hasSizeGreaterThan(workerCount); -// -// computeActual("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); -// assertThat(query("SELECT sum(key), listagg(value, ' ') WITHIN GROUP (ORDER BY key) FROM " + tableName)) -// .matches("VALUES (BIGINT '65', VARCHAR 'eleven zwölf trzynaście quatorze пʼятнадцять')"); -// List updatedFiles = getActiveFiles(tableName); -// assertThat(updatedFiles) -// .hasSizeBetween(1, workerCount) -// .doesNotContainAnyElementsOf(initialFiles); -// // No files should be removed (this is expire_snapshots's job, when it exists) -// assertThat(getAllDataFilesFromTableDirectory(tableName)) -// .containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); -// -// // optimize with low retention threshold, nothing should change -// computeActual("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE (file_size_threshold => '33B')"); -// assertThat(query("SELECT sum(key), listagg(value, ' ') WITHIN GROUP (ORDER BY key) FROM " + tableName)) -// .matches("VALUES (BIGINT '65', VARCHAR 'eleven zwölf trzynaście quatorze пʼятнадцять')"); -// assertThat(getActiveFiles(tableName)).isEqualTo(updatedFiles); -// assertThat(getAllDataFilesFromTableDirectory(tableName)) -// .containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); -// -// // optimize with delimited procedure name -// assertQueryFails("ALTER TABLE " + tableName + " EXECUTE \"optimize\"", "Table procedure not registered: optimize"); -// assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\""); -// // optimize with delimited parameter name (and procedure name) -// assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\" (\"file_size_threshold\" => '33B')"); // TODO (https://github.com/trinodb/trino/issues/11326) this should fail -// assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\" (\"FILE_SIZE_THRESHOLD\" => '33B')"); -// assertUpdate("DROP TABLE " + tableName); -// } + // @Test + // public void testOptimize() + // throws Exception { + // String tableName = "test_optimize_" + randomNameSuffix(); + // assertUpdate("CREATE TABLE " + tableName + " (key integer, value varchar) WITH + // (format_version = 1)"); + // + // // DistributedQueryRunner sets node-scheduler.include-coordinator by default, so include + // coordinator + // int workerCount = getQueryRunner().getNodeCount(); + // + // // optimize an empty table + // assertQuerySucceeds("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); + // assertThat(getActiveFiles(tableName)).isEmpty(); + // + // assertUpdate("INSERT INTO " + tableName + " VALUES (11, 'eleven')", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES (12, 'zwölf')", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES (13, 'trzynaście')", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES (14, 'quatorze')", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES (15, 'пʼятнадцять')", 1); + // + // List initialFiles = getActiveFiles(tableName); + // assertThat(initialFiles) + // .hasSize(5) + // // Verify we have sufficiently many test rows with respect to worker count. + // .hasSizeGreaterThan(workerCount); + // + // computeActual("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); + // assertThat(query("SELECT sum(key), listagg(value, ' ') WITHIN GROUP (ORDER BY key) FROM " + + // tableName)) + // .matches("VALUES (BIGINT '65', VARCHAR 'eleven zwölf trzynaście quatorze + // пʼятнадцять')"); + // List updatedFiles = getActiveFiles(tableName); + // assertThat(updatedFiles) + // .hasSizeBetween(1, workerCount) + // .doesNotContainAnyElementsOf(initialFiles); + // // No files should be removed (this is expire_snapshots's job, when it exists) + // assertThat(getAllDataFilesFromTableDirectory(tableName)) + // .containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); + // + // // optimize with low retention threshold, nothing should change + // computeActual("ALTER TABLE " + tableName + " EXECUTE OPTIMIZE (file_size_threshold => + // '33B')"); + // assertThat(query("SELECT sum(key), listagg(value, ' ') WITHIN GROUP (ORDER BY key) FROM " + + // tableName)) + // .matches("VALUES (BIGINT '65', VARCHAR 'eleven zwölf trzynaście quatorze + // пʼятнадцять')"); + // assertThat(getActiveFiles(tableName)).isEqualTo(updatedFiles); + // assertThat(getAllDataFilesFromTableDirectory(tableName)) + // .containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); + // + // // optimize with delimited procedure name + // assertQueryFails("ALTER TABLE " + tableName + " EXECUTE \"optimize\"", "Table procedure not + // registered: optimize"); + // assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\""); + // // optimize with delimited parameter name (and procedure name) + // assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\" (\"file_size_threshold\" => + // '33B')"); // TODO (https://github.com/trinodb/trino/issues/11326) this should fail + // assertUpdate("ALTER TABLE " + tableName + " EXECUTE \"OPTIMIZE\" (\"FILE_SIZE_THRESHOLD\" => + // '33B')"); + // assertUpdate("DROP TABLE " + tableName); + // } @Test - public void testOptimizeForPartitionedTable() - throws IOException { - // This test will have its own session to make sure partitioning is indeed forced and is not a result + public void testOptimizeForPartitionedTable() throws IOException { + // This test will have its own session to make sure partitioning is indeed forced and is not a + // result // of session configuration - Session session = testSessionBuilder() - .setCatalog(getQueryRunner().getDefaultSession().getCatalog()) - .setSchema(getQueryRunner().getDefaultSession().getSchema()) - .setSystemProperty("use_preferred_write_partitioning", "true") - .setSystemProperty("preferred_write_partitioning_min_number_of_partitions", "100") - .build(); + Session session = + testSessionBuilder() + .setCatalog(getQueryRunner().getDefaultSession().getCatalog()) + .setSchema(getQueryRunner().getDefaultSession().getSchema()) + .setSystemProperty("use_preferred_write_partitioning", "true") + .setSystemProperty("preferred_write_partitioning_min_number_of_partitions", "100") + .build(); String tableName = "test_repartitiong_during_optimize_" + randomNameSuffix(); - assertUpdate(session, "CREATE TABLE " + tableName + " (key varchar, value integer) WITH (format_version = 1, partitioning = ARRAY['key'])"); + assertUpdate( + session, + "CREATE TABLE " + + tableName + + " (key varchar, value integer) WITH (format_version = 1, partitioning = ARRAY['key'])"); // optimize an empty table assertQuerySucceeds(session, "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); @@ -2901,31 +3502,42 @@ public void testOptimizeForPartitionedTable() computeActual(session, "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); - assertThat(query(session, "SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + tableName)) + assertThat( + query( + session, + "SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + + tableName)) .matches("VALUES (BIGINT '55', VARCHAR 'one one one one one one one three two two')"); List updatedFiles = getActiveFiles(tableName); // as we force repartitioning there should be only 3 partitions assertThat(updatedFiles).hasSize(3); - assertThat(getAllDataFilesFromTableDirectory(tableName)).containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); + assertThat(getAllDataFilesFromTableDirectory(tableName)) + .containsExactlyInAnyOrderElementsOf(concat(initialFiles, updatedFiles)); assertUpdate("DROP TABLE " + tableName); } private List getActiveFiles(String tableName) { - return computeActual(format("SELECT file_path FROM \"%s$files\"", tableName)).getOnlyColumn() + return computeActual(format("SELECT file_path FROM \"%s$files\"", tableName)) + .getOnlyColumn() .map(String.class::cast) .collect(toImmutableList()); } - private List getAllDataFilesFromTableDirectory(String tableName) - throws IOException { + private List getAllDataFilesFromTableDirectory(String tableName) throws IOException { String schema = getSession().getSchema().orElseThrow(); - Path tableDataDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir() - .resolve("arctic").resolve("hadoop").resolve(schema).resolve(tableName).resolve("data"); + Path tableDataDir = + getDistributedQueryRunner() + .getCoordinator() + .getBaseDataDir() + .resolve("arctic") + .resolve("hadoop") + .resolve(schema) + .resolve(tableName) + .resolve("data"); try (Stream walk = Files.walk(tableDataDir)) { - return walk - .filter(Files::isRegularFile) + return walk.filter(Files::isRegularFile) .filter(path -> !path.getFileName().toString().matches("\\..*\\.crc")) .map(Path::toString) .collect(toImmutableList()); @@ -2949,7 +3561,8 @@ public void testOptimizeParameterValidation() { // public void testTargetMaxFileSize() // { // String tableName = "test_default_max_file_size" + randomNameSuffix(); - // @Language("SQL") String createTableSql = format("CREATE TABLE %s AS SELECT * FROM tpch.sf1.lineitem LIMIT 100000", tableName); + // @Language("SQL") String createTableSql = format("CREATE TABLE %s AS SELECT * FROM + // tpch.sf1.lineitem LIMIT 100000", tableName); // // Session session = Session.builder(getSession()) // .setSystemProperty("task_writer_count", "1") @@ -2966,15 +3579,18 @@ public void testOptimizeParameterValidation() { // .build(); // // assertUpdate(session, createTableSql, 100000); - // assertThat(query(format("SELECT count(*) FROM %s", tableName))).matches("VALUES BIGINT '100000'"); + // assertThat(query(format("SELECT count(*) FROM %s", tableName))).matches("VALUES BIGINT + // '100000'"); // List updatedFiles = getActiveFiles(tableName); // assertThat(updatedFiles.size()).isGreaterThan(10); // // computeActual(format("SELECT file_size_in_bytes FROM \"%s$files\"", tableName)) // .getMaterializedRows() - // // as target_max_file_size is set to quite low value it can happen that created files are bigger, + // // as target_max_file_size is set to quite low value it can happen that created + // files are bigger, // // so just to be safe we check if it is not much bigger - // .forEach(row -> assertThat((Long) row.getField(0)).isBetween(1L, maxSize.toBytes() * 3)); + // .forEach(row -> assertThat((Long) row.getField(0)).isBetween(1L, maxSize.toBytes() + // * 3)); // } @Test @@ -2984,26 +3600,32 @@ public void testDroppingIcebergAndCreatingANewTableWithTheSameNameShouldBePossib dropTable("test_arctic_recreate"); assertUpdate("CREATE TABLE test_arctic_recreate (a_varchar) AS VALUES ('Trino')", 1); - assertThat(query("SELECT min(a_varchar) FROM test_arctic_recreate")).matches("VALUES CAST('Trino' AS varchar)"); + assertThat(query("SELECT min(a_varchar) FROM test_arctic_recreate")) + .matches("VALUES CAST('Trino' AS varchar)"); dropTable("test_arctic_recreate"); } @Test public void testPathHiddenColumn() { String tableName = "test_path_" + randomNameSuffix(); - @Language("SQL") String createTable = "CREATE TABLE " + tableName + " " + - "WITH ( partitioning = ARRAY['zip'] ) AS " + - "SELECT * FROM (VALUES " + - "(0, 0), (3, 0), (6, 0), " + - "(1, 1), (4, 1), (7, 1), " + - "(2, 2), (5, 2) " + - " ) t(userid, zip)"; + @Language("SQL") + String createTable = + "CREATE TABLE " + + tableName + + " " + + "WITH ( partitioning = ARRAY['zip'] ) AS " + + "SELECT * FROM (VALUES " + + "(0, 0), (3, 0), (6, 0), " + + "(1, 1), (4, 1), (7, 1), " + + "(2, 2), (5, 2) " + + " ) t(userid, zip)"; assertUpdate(createTable, 8); - MaterializedResult expectedColumns = resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) - .row("userid", "integer", "", "") - .row("zip", "integer", "", "") - .build(); + MaterializedResult expectedColumns = + resultBuilder(getSession(), VARCHAR, VARCHAR, VARCHAR, VARCHAR) + .row("userid", "integer", "", "") + .row("zip", "integer", "", "") + .build(); MaterializedResult actualColumns = computeActual(format("DESCRIBE %s", tableName)); // Describe output should not have the $path hidden column assertEquals(actualColumns, expectedColumns); @@ -3015,21 +3637,28 @@ public void testPathHiddenColumn() { } @Test - public void testExpireSnapshots() - throws Exception { + public void testExpireSnapshots() throws Exception { String tableName = "test_expiring_snapshots_" + randomNameSuffix(); Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer)"); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); - assertThat(query("SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + tableName)) + assertThat( + query( + "SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + + tableName)) .matches("VALUES (BIGINT '3', VARCHAR 'one two')"); List initialSnapshots = getSnapshotIds(tableName); List initialFiles = getAllMetadataFilesFromTableDirectoryForTable(tableName); - assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); - - assertThat(query("SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + tableName)) + assertQuerySucceeds( + sessionWithShortRetentionUnlocked, + "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); + + assertThat( + query( + "SELECT sum(value), listagg(key, ' ') WITHIN GROUP (ORDER BY key) FROM " + + tableName)) .matches("VALUES (BIGINT '3', VARCHAR 'one two')"); List updatedFiles = getAllMetadataFilesFromTableDirectoryForTable(tableName); List updatedSnapshots = getSnapshotIds(tableName); @@ -3040,19 +3669,27 @@ public void testExpireSnapshots() } @Test - public void testExpireSnapshotsPartitionedTable() - throws Exception { + public void testExpireSnapshotsPartitionedTable() throws Exception { String tableName = "test_expiring_snapshots_partitioned_table" + randomNameSuffix(); Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); - assertUpdate("CREATE TABLE " + tableName + " (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])"); - assertUpdate("INSERT INTO " + tableName + " VALUES(1, 100), (1, 101), (1, 102), (2, 200), (2, 201), (3, 300)", 6); + assertUpdate( + "CREATE TABLE " + + tableName + + " (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])"); + assertUpdate( + "INSERT INTO " + + tableName + + " VALUES(1, 100), (1, 101), (1, 102), (2, 200), (2, 201), (3, 300)", + 6); assertUpdate("DELETE FROM " + tableName + " WHERE col1 = 1", 3); assertUpdate("INSERT INTO " + tableName + " VALUES(4, 400)", 1); assertQuery("SELECT sum(col2) FROM " + tableName, "SELECT 1101"); List initialDataFiles = getAllDataFilesFromTableDirectory(tableName); List initialSnapshots = getSnapshotIds(tableName); - assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); + assertQuerySucceeds( + sessionWithShortRetentionUnlocked, + "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); List updatedDataFiles = getAllDataFilesFromTableDirectory(tableName); List updatedSnapshots = getSnapshotIds(tableName); @@ -3064,11 +3701,17 @@ public void testExpireSnapshotsPartitionedTable() @Test public void testExplainExpireSnapshotOutput() { String tableName = "test_expiring_snapshots_output" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); + assertUpdate( + "CREATE TABLE " + + tableName + + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); - assertExplain("EXPLAIN ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')", + assertExplain( + "EXPLAIN ALTER TABLE " + + tableName + + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')", "SimpleTableExecute\\[table = arctic:schemaTableName:tpch.test_expiring_snapshots.*\\{retentionThreshold=0\\.00s}.*"); } @@ -3088,30 +3731,34 @@ public void testExpireSnapshotsParameterValidation() { "\\QRetention specified (33.00s) is shorter than the minimum retention configured in the system (7.00d). Minimum retention can be changed with iceberg.expire_snapshots.min-retention configuration property or iceberg.expire_snapshots_min_retention session property"); } -// @Test -// public void testRemoveOrphanFiles() -// throws Exception { -// String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); -// Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); -// assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer)"); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2), ('three', 3)", 2); -// assertUpdate("DELETE FROM " + tableName + " WHERE key = 'two'", 1); -// String location = getTableLocation(tableName); -// Path orphanFile = Files.createFile(Path.of(getIcebergTableDataPath(location).toString(), "invalidData." + format)); -// List initialDataFiles = getAllDataFilesFromTableDirectory(tableName); -// -// assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); -// assertQuery("SELECT * FROM " + tableName, "VALUES ('one', 1), ('three', 3)"); -// -// List updatedDataFiles = getAllDataFilesFromTableDirectory(tableName); -// assertThat(updatedDataFiles.size()).isLessThan(initialDataFiles.size()); -// assertThat(updatedDataFiles).doesNotContain(orphanFile.toString()); -// } + // @Test + // public void testRemoveOrphanFiles() + // throws Exception { + // String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); + // Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); + // assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer)"); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2), ('three', 3)", 2); + // assertUpdate("DELETE FROM " + tableName + " WHERE key = 'two'", 1); + // String location = getTableLocation(tableName); + // Path orphanFile = Files.createFile(Path.of(getIcebergTableDataPath(location).toString(), + // "invalidData." + format)); + // List initialDataFiles = getAllDataFilesFromTableDirectory(tableName); + // + // assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " + // EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); + // assertQuery("SELECT * FROM " + tableName, "VALUES ('one', 1), ('three', 3)"); + // + // List updatedDataFiles = getAllDataFilesFromTableDirectory(tableName); + // assertThat(updatedDataFiles.size()).isLessThan(initialDataFiles.size()); + // assertThat(updatedDataFiles).doesNotContain(orphanFile.toString()); + // } private String getTableLocation(String tableName) { Pattern locationPattern = Pattern.compile(".*location = '(.*?)'.*", Pattern.DOTALL); - Matcher m = locationPattern.matcher((String) computeActual("SHOW CREATE TABLE " + tableName).getOnlyValue()); + Matcher m = + locationPattern.matcher( + (String) computeActual("SHOW CREATE TABLE " + tableName).getOnlyValue()); if (m.find()) { String location = m.group(1); verify(!m.find(), "Unexpected second match"); @@ -3120,66 +3767,81 @@ private String getTableLocation(String tableName) { throw new IllegalStateException("Location not found in SHOW CREATE TABLE result"); } -// TestTable in Trino does not support FileIO, so skip this test -// @Test -// public void testIfRemoveOrphanFilesCleansUnnecessaryDataFilesInPartitionedTable() -// throws Exception { -// String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); -// Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); -// assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); -// Path orphanFile = Files.createFile(Path.of(getIcebergTableDataPath(tableName) + "/key=one/", "invalidData." + format)); -// List initialDataFiles = getAllDataFilesFromTableDirectory(tableName); -// -// assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); -// -// List updatedDataFiles = getAllDataFilesFromTableDirectory(tableName); -// assertThat(updatedDataFiles.size()).isLessThan(initialDataFiles.size()); -// assertThat(updatedDataFiles).doesNotContain(orphanFile.toString()); -// } - -// @Test -// public void testIfRemoveOrphanFilesCleansUnnecessaryMetadataFilesInPartitionedTable() -// throws Exception -// { -// String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); -// Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); -// assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); -// assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); -// Path orphanMetadataFile = Files.createFile(Path.of(getIcebergTableMetadataPath(tableName).toString(), "invalidData." + format)); -// List initialMetadataFiles = getAllMetadataFilesFromTableDirectoryForTable(tableName); -// -// assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); -// -// List updatedMetadataFiles = getAllMetadataFilesFromTableDirectoryForTable(tableName); -// assertThat(updatedMetadataFiles.size()).isLessThan(initialMetadataFiles.size()); -// assertThat(updatedMetadataFiles).doesNotContain(orphanMetadataFile.toString()); -// } - -// @Test -// public void testCleaningUpWithTableWithSpecifiedLocationWithSlashAtTheEnd() -// throws IOException -// { -// testCleaningUpWithTableWithSpecifiedLocation("/"); -// } -// -// @Test -// public void testCleaningUpWithTableWithSpecifiedLocationWithoutSlashAtTheEnd() -// throws IOException -// { -// testCleaningUpWithTableWithSpecifiedLocation(""); -// } + // TestTable in Trino does not support FileIO, so skip this test + // @Test + // public void testIfRemoveOrphanFilesCleansUnnecessaryDataFilesInPartitionedTable() + // throws Exception { + // String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); + // Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); + // assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning + // = ARRAY['key'])"); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); + // Path orphanFile = Files.createFile(Path.of(getIcebergTableDataPath(tableName) + "/key=one/", + // "invalidData." + format)); + // List initialDataFiles = getAllDataFilesFromTableDirectory(tableName); + // + // assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " + // EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); + // + // List updatedDataFiles = getAllDataFilesFromTableDirectory(tableName); + // assertThat(updatedDataFiles.size()).isLessThan(initialDataFiles.size()); + // assertThat(updatedDataFiles).doesNotContain(orphanFile.toString()); + // } + + // @Test + // public void testIfRemoveOrphanFilesCleansUnnecessaryMetadataFilesInPartitionedTable() + // throws Exception + // { + // String tableName = "test_deleting_orphan_files_unnecessary_files" + randomNameSuffix(); + // Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); + // assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH + // (partitioning = ARRAY['key'])"); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); + // assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); + // Path orphanMetadataFile = + // Files.createFile(Path.of(getIcebergTableMetadataPath(tableName).toString(), "invalidData." + + // format)); + // List initialMetadataFiles = + // getAllMetadataFilesFromTableDirectoryForTable(tableName); + // + // assertQuerySucceeds(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " + // EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); + // + // List updatedMetadataFiles = + // getAllMetadataFilesFromTableDirectoryForTable(tableName); + // assertThat(updatedMetadataFiles.size()).isLessThan(initialMetadataFiles.size()); + // assertThat(updatedMetadataFiles).doesNotContain(orphanMetadataFile.toString()); + // } + + // @Test + // public void testCleaningUpWithTableWithSpecifiedLocationWithSlashAtTheEnd() + // throws IOException + // { + // testCleaningUpWithTableWithSpecifiedLocation("/"); + // } + // + // @Test + // public void testCleaningUpWithTableWithSpecifiedLocationWithoutSlashAtTheEnd() + // throws IOException + // { + // testCleaningUpWithTableWithSpecifiedLocation(""); + // } @Test public void testExplainRemoveOrphanFilesOutput() { String tableName = "test_remove_orphan_files_output" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); + assertUpdate( + "CREATE TABLE " + + tableName + + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); - assertExplain("EXPLAIN ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')", + assertExplain( + "EXPLAIN ALTER TABLE " + + tableName + + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')", "SimpleTableExecute\\[table = arctic:schemaTableName:tpch.test_remove_orphan_files.*\\{retentionThreshold=0\\.00s}.*"); } @@ -3202,14 +3864,21 @@ public void testRemoveOrphanFilesParameterValidation() { @Test public void testIfDeletesReturnsNumberOfRemovedRows() { String tableName = "test_delete_returns_number_of_rows_" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableName + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); + assertUpdate( + "CREATE TABLE " + + tableName + + " (key varchar, value integer) WITH (partitioning = ARRAY['key'])"); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 1)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 2)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('one', 3)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 1)", 1); assertUpdate("INSERT INTO " + tableName + " VALUES ('two', 2)", 1); assertUpdate("DELETE FROM " + tableName + " WHERE key = 'one'", 3); - assertUpdate("DELETE FROM " + tableName + " WHERE key = 'one'"); // TODO change this when arctic will guarantee to always return this (https://github.com/apache/iceberg/issues/4647) + assertUpdate( + "DELETE FROM " + + tableName + + " WHERE key = 'one'"); // TODO change this when arctic will guarantee to always return + // this (https://github.com/apache/iceberg/issues/4647) assertUpdate("DELETE FROM " + tableName + " WHERE key = 'three'"); assertUpdate("DELETE FROM " + tableName + " WHERE key = 'two'", 2); } @@ -3218,16 +3887,30 @@ public void testIfDeletesReturnsNumberOfRemovedRows() { public void testUpdatingFileFormat() { String tableName = "test_updating_file_format_" + randomNameSuffix(); - assertUpdate("CREATE TABLE " + tableName + " WITH (format = 'orc') AS SELECT * FROM nation WHERE nationkey < 10", "SELECT count(*) FROM nation WHERE nationkey < 10"); - assertQuery("SELECT value FROM \"" + tableName + "$properties\" WHERE key = 'write.format.default'", "VALUES 'ORC'"); + assertUpdate( + "CREATE TABLE " + + tableName + + " WITH (format = 'orc') AS SELECT * FROM nation WHERE nationkey < 10", + "SELECT count(*) FROM nation WHERE nationkey < 10"); + assertQuery( + "SELECT value FROM \"" + tableName + "$properties\" WHERE key = 'write.format.default'", + "VALUES 'ORC'"); assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES format = 'parquet'"); - assertQuery("SELECT value FROM \"" + tableName + "$properties\" WHERE key = 'write.format.default'", "VALUES 'PARQUET'"); - assertUpdate("INSERT INTO " + tableName + " SELECT * FROM nation WHERE nationkey >= 10", "SELECT count(*) FROM nation WHERE nationkey >= 10"); + assertQuery( + "SELECT value FROM \"" + tableName + "$properties\" WHERE key = 'write.format.default'", + "VALUES 'PARQUET'"); + assertUpdate( + "INSERT INTO " + tableName + " SELECT * FROM nation WHERE nationkey >= 10", + "SELECT count(*) FROM nation WHERE nationkey >= 10"); assertQuery("SELECT * FROM " + tableName, "SELECT * FROM nation"); - assertQuery("SELECT count(*) FROM \"" + tableName + "$files\" WHERE file_path LIKE '%.orc'", "VALUES 1"); - assertQuery("SELECT count(*) FROM \"" + tableName + "$files\" WHERE file_path LIKE '%.parquet'", "VALUES 1"); + assertQuery( + "SELECT count(*) FROM \"" + tableName + "$files\" WHERE file_path LIKE '%.orc'", + "VALUES 1"); + assertQuery( + "SELECT count(*) FROM \"" + tableName + "$files\" WHERE file_path LIKE '%.parquet'", + "VALUES 1"); assertUpdate("DROP TABLE " + tableName); } @@ -3236,7 +3919,12 @@ public void testUpdatingFileFormat() { public void testUpdatingInvalidTableProperty() { String tableName = "test_updating_invalid_table_property_" + randomNameSuffix(); assertUpdate("CREATE TABLE " + tableName + " (a INT, b INT)"); - assertThatThrownBy(() -> query("ALTER TABLE " + tableName + " SET PROPERTIES not_a_valid_table_property = 'a value'")) + assertThatThrownBy( + () -> + query( + "ALTER TABLE " + + tableName + + " SET PROPERTIES not_a_valid_table_property = 'a value'")) .hasMessage("Catalog 'arctic' table property 'not_a_valid_table_property' does not exist"); assertUpdate("DROP TABLE " + tableName); } @@ -3251,7 +3939,15 @@ private Session prepareCleanUpSession() { private List getAllMetadataFilesFromTableDirectoryForTable(String tableName) throws IOException { String schema = getSession().getSchema().orElseThrow(); - Path tableDataDir = getDistributedQueryRunner().getCoordinator().getBaseDataDir().resolve("arctic").resolve("hadoop").resolve(schema).resolve(tableName).resolve("metadata"); + Path tableDataDir = + getDistributedQueryRunner() + .getCoordinator() + .getBaseDataDir() + .resolve("arctic") + .resolve("hadoop") + .resolve(schema) + .resolve(tableName) + .resolve("metadata"); return listAllTableFilesInDirectory(tableDataDir); } @@ -3260,11 +3956,9 @@ private List getAllMetadataFilesFromTableDirectory(String tableDataDir) return listAllTableFilesInDirectory(Path.of(URI.create(tableDataDir).getPath())); } - private List listAllTableFilesInDirectory(Path tableDataPath) - throws IOException { + private List listAllTableFilesInDirectory(Path tableDataPath) throws IOException { try (Stream walk = Files.walk(tableDataPath)) { - return walk - .filter(Files::isRegularFile) + return walk.filter(Files::isRegularFile) .filter(path -> !path.getFileName().toString().matches("\\..*\\.crc")) .map(Path::toString) .collect(toImmutableList()); @@ -3272,7 +3966,8 @@ private List listAllTableFilesInDirectory(Path tableDataPath) } private List getSnapshotIds(String tableName) { - return getQueryRunner().execute(format("SELECT snapshot_id FROM \"%s$snapshots\"", tableName)) + return getQueryRunner() + .execute(format("SELECT snapshot_id FROM \"%s$snapshots\"", tableName)) .getOnlyColumn() .map(Long.class::cast) .collect(toUnmodifiableList()); @@ -3288,7 +3983,13 @@ private Path getIcebergTableMetadataPath(String tableName) { private Path getIcebergTablePath(String tableName, String suffix) { String schema = getSession().getSchema().orElseThrow(); - return getDistributedQueryRunner().getCoordinator().getBaseDataDir().resolve("arctic") - .resolve("hadoop").resolve(schema).resolve(tableName).resolve(suffix); + return getDistributedQueryRunner() + .getCoordinator() + .getBaseDataDir() + .resolve("arctic") + .resolve("hadoop") + .resolve(schema) + .resolve(tableName) + .resolve(suffix); } } diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBasicArcticCatalog.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBasicArcticCatalog.java index 9961aae02b..d000bca42a 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBasicArcticCatalog.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestBasicArcticCatalog.java @@ -48,7 +48,7 @@ public TestBasicArcticCatalog(String location) { this.location = location; Configuration conf = new Configuration(); this.catalog = new HadoopCatalog(conf, location); - //创建catalog.db,便于测试 + // 创建catalog.db,便于测试 if (!catalog.namespaceExists(Namespace.of(catalog.name(), "tpch"))) { catalog.createNamespace(Namespace.of(catalog.name(), "tpch")); } @@ -62,36 +62,41 @@ public String name() { @Override public List listDatabases() { return catalog.listNamespaces(Namespace.of(catalog.name())).stream() - .map(s -> s.level(1)).collect(Collectors.toList()); + .map(s -> s.level(1)) + .collect(Collectors.toList()); } @Override public List listTables(String database) { - return catalog.listTables(Namespace.of(catalog.name(), database)) - .stream() - .map(s -> TableIdentifier.of(s.namespace().level(0), - s.namespace().level(1), s.name())) + return catalog.listTables(Namespace.of(catalog.name(), database)).stream() + .map(s -> TableIdentifier.of(s.namespace().level(0), s.namespace().level(1), s.name())) .collect(Collectors.toList()); } @Override public ArcticTable loadTable(TableIdentifier identifier) { - Table table = catalog.loadTable( - org.apache.iceberg.catalog.TableIdentifier.of(identifier.getCatalog(), - identifier.getDatabase(), identifier.getTableName())); + Table table = + catalog.loadTable( + org.apache.iceberg.catalog.TableIdentifier.of( + identifier.getCatalog(), identifier.getDatabase(), identifier.getTableName())); return new TestArcticTable((BaseTable) table, identifier); } @Override public boolean dropTable(TableIdentifier identifier, boolean purge) { - return catalog.dropTable(org.apache.iceberg.catalog.TableIdentifier.of(identifier.getCatalog(), - identifier.getDatabase(), identifier.getTableName()), purge); + return catalog.dropTable( + org.apache.iceberg.catalog.TableIdentifier.of( + identifier.getCatalog(), identifier.getDatabase(), identifier.getTableName()), + purge); } @Override public boolean tableExists(TableIdentifier tableIdentifier) { - return catalog.tableExists(org.apache.iceberg.catalog.TableIdentifier.of(tableIdentifier.getCatalog(), - tableIdentifier.getDatabase(), tableIdentifier.getTableName())); + return catalog.tableExists( + org.apache.iceberg.catalog.TableIdentifier.of( + tableIdentifier.getCatalog(), + tableIdentifier.getDatabase(), + tableIdentifier.getTableName())); } @Override @@ -150,8 +155,13 @@ public ArcticTable create() { @Override public Transaction createTransaction() { - return catalog.newCreateTableTransaction(org.apache.iceberg.catalog.TableIdentifier.of(identifier.getCatalog(), - identifier.getDatabase(), identifier.getTableName()), schema, partitionSpec, location, properties); + return catalog.newCreateTableTransaction( + org.apache.iceberg.catalog.TableIdentifier.of( + identifier.getCatalog(), identifier.getDatabase(), identifier.getTableName()), + schema, + partitionSpec, + location, + properties); } } } diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestOnServerArcticConnectorTest.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestOnServerArcticConnectorTest.java index 88cc9074ef..b9a4bc1b42 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestOnServerArcticConnectorTest.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestOnServerArcticConnectorTest.java @@ -24,7 +24,8 @@ public class TestOnServerArcticConnectorTest extends TestBaseArcticConnectorTest { public static void main(String[] args) throws Exception { - DistributedQueryRunner queryRunner = new TestOnServerArcticConnectorTest().createQueryRunnerForClient(); + DistributedQueryRunner queryRunner = + new TestOnServerArcticConnectorTest().createQueryRunnerForClient(); Thread.sleep(10); Logger log = Logger.get(TestOnServerArcticConnectorTest.class); log.info("======== SERVER STARTED ========"); diff --git a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestUnionModule.java b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestUnionModule.java index da8cd0e27d..e66e81bde4 100644 --- a/trino/src/test/java/com/netease/arctic/trino/iceberg/TestUnionModule.java +++ b/trino/src/test/java/com/netease/arctic/trino/iceberg/TestUnionModule.java @@ -18,6 +18,11 @@ package com.netease.arctic.trino.iceberg; +import static com.google.inject.multibindings.Multibinder.newSetBinder; +import static io.airlift.configuration.ConfigBinder.configBinder; +import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; +import static org.weakref.jmx.guice.ExportBinder.newExporter; + import com.google.inject.Binder; import com.google.inject.Module; import com.google.inject.Scopes; @@ -29,7 +34,6 @@ import com.netease.arctic.trino.ArcticHdfsConfiguration; import com.netease.arctic.trino.ArcticMetadataFactory; import com.netease.arctic.trino.ArcticPageSourceProvider; -import com.netease.arctic.trino.ArcticSessionProperties; import com.netease.arctic.trino.ArcticTransactionManager; import com.netease.arctic.trino.keyed.KeyedConnectorSplitManager; import com.netease.arctic.trino.keyed.KeyedPageSourceProvider; @@ -70,73 +74,87 @@ import io.trino.spi.procedure.Procedure; import org.weakref.jmx.guice.ExportBinder; -import static com.google.inject.multibindings.Multibinder.newSetBinder; -import static io.airlift.configuration.ConfigBinder.configBinder; -import static io.airlift.json.JsonCodecBinder.jsonCodecBinder; -import static org.weakref.jmx.guice.ExportBinder.newExporter; - public class TestUnionModule implements Module { - @Override public void configure(Binder binder) { - //base - configBinder(binder).bindConfig(ArcticConfig.class); - binder.bind(IcebergSessionProperties.class).in(Scopes.SINGLETON); - binder.bind(KeyedConnectorSplitManager.class).in(Scopes.SINGLETON); - binder.bind(KeyedPageSourceProvider.class).in(Scopes.SINGLETON); - binder.bind(ArcticCatalogFactory.class).to(TestArcticCatalogFactory.class).in(Scopes.SINGLETON); - binder.bind(TrinoCatalogFactory.class).to(ArcticTrinoCatalogFactory.class).in(Scopes.SINGLETON); - binder.bind(ArcticTransactionManager.class).in(Scopes.SINGLETON); - binder.bind(ArcticMetadataFactory.class).in(Scopes.SINGLETON); - binder.bind(TableStatisticsWriter.class).in(Scopes.SINGLETON); - binder.bind(ConnectorSplitManager.class).to(ArcticConnectorSplitManager.class).in(Scopes.SINGLETON); - binder.bind(ConnectorPageSourceProvider.class).to(ArcticPageSourceProvider.class).in(Scopes.SINGLETON); - - //############# IcebergModule - configBinder(binder).bindConfig(HiveMetastoreConfig.class); - configBinder(binder).bindConfig(IcebergConfig.class); - - newSetBinder(binder, SessionPropertiesProvider.class).addBinding().to(IcebergSessionProperties.class).in(Scopes.SINGLETON); - binder.bind(IcebergTableProperties.class).in(Scopes.SINGLETON); - - binder.bind(IcebergSplitManager.class).in(Scopes.SINGLETON); - binder.bind(IcebergPageSourceProvider.class).in(Scopes.SINGLETON); - - binder.bind(ConnectorPageSinkProvider.class) - .to(IcebergPageSinkProvider.class).in(Scopes.SINGLETON); - - binder.bind(ConnectorNodePartitioningProvider.class) - .to(IcebergNodePartitioningProvider.class).in(Scopes.SINGLETON); - - configBinder(binder).bindConfig(OrcReaderConfig.class); - configBinder(binder).bindConfig(OrcWriterConfig.class); - - configBinder(binder).bindConfig(ParquetReaderConfig.class); - configBinder(binder).bindConfig(ParquetWriterConfig.class); - - jsonCodecBinder(binder).bindJsonCodec(CommitTaskData.class); - - binder.bind(FileFormatDataSourceStats.class).in(Scopes.SINGLETON); - newExporter(binder).export(FileFormatDataSourceStats.class).withGeneratedName(); - - binder.bind(IcebergFileWriterFactory.class).in(Scopes.SINGLETON); - newExporter(binder).export(IcebergFileWriterFactory.class).withGeneratedName(); - - Multibinder procedures = newSetBinder(binder, Procedure.class); - procedures.addBinding().toProvider(RollbackToSnapshotProcedure.class).in(Scopes.SINGLETON); - - Multibinder tableProcedures = newSetBinder(binder, TableProcedureMetadata.class); - tableProcedures.addBinding().toProvider(OptimizeTableProcedure.class).in(Scopes.SINGLETON); - tableProcedures.addBinding().toProvider(ExpireSnapshotsTableProcedure.class).in(Scopes.SINGLETON); - tableProcedures.addBinding().toProvider(RemoveOrphanFilesTableProcedure.class).in(Scopes.SINGLETON); - - //hdfs - ConfigBinder.configBinder(binder).bindConfig(HdfsConfig.class); - binder.bind(HdfsConfiguration.class).to(ArcticHdfsConfiguration.class).in(Scopes.SINGLETON); - binder.bind(HdfsAuthentication.class).to(ArcticHdfsAuthentication.class).in(Scopes.SINGLETON); - binder.bind(HdfsEnvironment.class).in(Scopes.SINGLETON); - binder.bind(NamenodeStats.class).in(Scopes.SINGLETON); - ExportBinder.newExporter(binder).export(NamenodeStats.class).withGeneratedName(); + // base + configBinder(binder).bindConfig(ArcticConfig.class); + binder.bind(IcebergSessionProperties.class).in(Scopes.SINGLETON); + binder.bind(KeyedConnectorSplitManager.class).in(Scopes.SINGLETON); + binder.bind(KeyedPageSourceProvider.class).in(Scopes.SINGLETON); + binder.bind(ArcticCatalogFactory.class).to(TestArcticCatalogFactory.class).in(Scopes.SINGLETON); + binder.bind(TrinoCatalogFactory.class).to(ArcticTrinoCatalogFactory.class).in(Scopes.SINGLETON); + binder.bind(ArcticTransactionManager.class).in(Scopes.SINGLETON); + binder.bind(ArcticMetadataFactory.class).in(Scopes.SINGLETON); + binder.bind(TableStatisticsWriter.class).in(Scopes.SINGLETON); + binder + .bind(ConnectorSplitManager.class) + .to(ArcticConnectorSplitManager.class) + .in(Scopes.SINGLETON); + binder + .bind(ConnectorPageSourceProvider.class) + .to(ArcticPageSourceProvider.class) + .in(Scopes.SINGLETON); + + // ############# IcebergModule + configBinder(binder).bindConfig(HiveMetastoreConfig.class); + configBinder(binder).bindConfig(IcebergConfig.class); + + newSetBinder(binder, SessionPropertiesProvider.class) + .addBinding() + .to(IcebergSessionProperties.class) + .in(Scopes.SINGLETON); + binder.bind(IcebergTableProperties.class).in(Scopes.SINGLETON); + + binder.bind(IcebergSplitManager.class).in(Scopes.SINGLETON); + binder.bind(IcebergPageSourceProvider.class).in(Scopes.SINGLETON); + + binder + .bind(ConnectorPageSinkProvider.class) + .to(IcebergPageSinkProvider.class) + .in(Scopes.SINGLETON); + + binder + .bind(ConnectorNodePartitioningProvider.class) + .to(IcebergNodePartitioningProvider.class) + .in(Scopes.SINGLETON); + + configBinder(binder).bindConfig(OrcReaderConfig.class); + configBinder(binder).bindConfig(OrcWriterConfig.class); + + configBinder(binder).bindConfig(ParquetReaderConfig.class); + configBinder(binder).bindConfig(ParquetWriterConfig.class); + + jsonCodecBinder(binder).bindJsonCodec(CommitTaskData.class); + + binder.bind(FileFormatDataSourceStats.class).in(Scopes.SINGLETON); + newExporter(binder).export(FileFormatDataSourceStats.class).withGeneratedName(); + + binder.bind(IcebergFileWriterFactory.class).in(Scopes.SINGLETON); + newExporter(binder).export(IcebergFileWriterFactory.class).withGeneratedName(); + + Multibinder procedures = newSetBinder(binder, Procedure.class); + procedures.addBinding().toProvider(RollbackToSnapshotProcedure.class).in(Scopes.SINGLETON); + + Multibinder tableProcedures = + newSetBinder(binder, TableProcedureMetadata.class); + tableProcedures.addBinding().toProvider(OptimizeTableProcedure.class).in(Scopes.SINGLETON); + tableProcedures + .addBinding() + .toProvider(ExpireSnapshotsTableProcedure.class) + .in(Scopes.SINGLETON); + tableProcedures + .addBinding() + .toProvider(RemoveOrphanFilesTableProcedure.class) + .in(Scopes.SINGLETON); + + // hdfs + ConfigBinder.configBinder(binder).bindConfig(HdfsConfig.class); + binder.bind(HdfsConfiguration.class).to(ArcticHdfsConfiguration.class).in(Scopes.SINGLETON); + binder.bind(HdfsAuthentication.class).to(ArcticHdfsAuthentication.class).in(Scopes.SINGLETON); + binder.bind(HdfsEnvironment.class).in(Scopes.SINGLETON); + binder.bind(NamenodeStats.class).in(Scopes.SINGLETON); + ExportBinder.newExporter(binder).export(NamenodeStats.class).withGeneratedName(); } }