From 0a0670e9aa1d7ed99600f986df0e5be29ebb0b7b Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 16 Aug 2022 09:20:56 +0300 Subject: [PATCH 01/20] rebase Co-Authored-By: Jian Tang rename to default encr mng factory test fix for: CTAS produces unencrypted files sync with design check file format in table props use new kms interface refactor common KMS client for unitests refactor update move factory to another PR update --- .../iceberg/hadoop/HadoopTableOperations.java | 18 +++ .../org/apache/iceberg/hive/HiveCatalog.java | 10 +- .../iceberg/hive/HiveTableOperations.java | 28 ++++ .../spark/sql/TestTableEncryption.java | 150 ++++++++++++++++++ 4 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java index 4e815ceff59a..ae7ea982128a 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java @@ -38,6 +38,7 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionManagerFactory; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.exceptions.ValidationException; @@ -67,17 +68,29 @@ public class HadoopTableOperations implements TableOperations { private final Path location; private final FileIO fileIO; private final LockManager lockManager; + private final EncryptionManagerFactory encryptionManagerFactory; private volatile TableMetadata currentMetadata = null; private volatile Integer version = null; private volatile boolean shouldRefresh = true; + // Not used. TODO deprecate / handle in revapi protected HadoopTableOperations( Path location, FileIO fileIO, Configuration conf, LockManager lockManager) { + this(location, fileIO, EncryptionManagerFactory.NO_ENCRYPTION, conf, lockManager); + } + + protected HadoopTableOperations( + Path location, + FileIO fileIO, + EncryptionManagerFactory encryptionManagerFactory, + Configuration conf, + LockManager lockManager) { this.conf = conf; this.location = location; this.fileIO = fileIO; this.lockManager = lockManager; + this.encryptionManagerFactory = encryptionManagerFactory; } @Override @@ -178,6 +191,11 @@ public FileIO io() { return fileIO; } + @Override + public EncryptionManager encryption() { + return encryptionManagerFactory.create(current()); + } + @Override public LocationProvider locationProvider() { return LocationProviders.locationsFor(current().location(), current().properties()); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index b4f49e29fc49..c2ef747d235c 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -43,6 +43,9 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionManagerFactory; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -76,6 +79,7 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa private String name; private Configuration conf; private FileIO fileIO; + private EncryptionManagerFactory encryptionManagerFactory; private ClientPool clients; private boolean listAllTables = false; private Map catalogProperties; @@ -110,6 +114,9 @@ public void initialize(String inputName, Map properties) { ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); + this.encryptionManagerFactory = new StandardEncryptionManagerFactory(); + this.encryptionManagerFactory.initialize(properties); + this.clients = new CachedClientPool(conf, properties); } @@ -512,7 +519,8 @@ private boolean isValidateNamespace(Namespace namespace) { public TableOperations newTableOps(TableIdentifier tableIdentifier) { String dbName = tableIdentifier.namespace().level(0); String tableName = tableIdentifier.name(); - return new HiveTableOperations(conf, clients, fileIO, name, dbName, tableName); + return new HiveTableOperations( + conf, clients, fileIO, encryptionManagerFactory, name, dbName, tableName); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 64f091385297..e42f3939f3db 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -45,6 +45,8 @@ import org.apache.iceberg.SortOrderParser; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionManagerFactory; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; @@ -106,8 +108,10 @@ public static String translateToIcebergProp(String hmsProp) { private final long maxHiveTablePropertySize; private final int metadataRefreshMaxRetries; private final FileIO fileIO; + private final EncryptionManagerFactory encryptionManagerFactory; private final ClientPool metaClients; + /** Tests only */ protected HiveTableOperations( Configuration conf, ClientPool metaClients, @@ -115,9 +119,28 @@ protected HiveTableOperations( String catalogName, String database, String table) { + this( + conf, + metaClients, + fileIO, + EncryptionManagerFactory.NO_ENCRYPTION, + catalogName, + database, + table); + } + + protected HiveTableOperations( + Configuration conf, + ClientPool metaClients, + FileIO fileIO, + EncryptionManagerFactory encryptionManagerFactory, + String catalogName, + String database, + String table) { this.conf = conf; this.metaClients = metaClients; this.fileIO = fileIO; + this.encryptionManagerFactory = encryptionManagerFactory; this.fullName = catalogName + "." + database + "." + table; this.catalogName = catalogName; this.database = database; @@ -140,6 +163,11 @@ public FileIO io() { return fileIO; } + @Override + public EncryptionManager encryption() { + return encryptionManagerFactory.create(current()); + } + @Override protected void doRefresh() { String metadataLocation = null; diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java new file mode 100644 index 000000000000..d984bcee2264 --- /dev/null +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.sql; + +import static org.apache.iceberg.Files.localInput; +import static org.apache.iceberg.types.Types.NestedField.optional; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.Schema; +import org.apache.iceberg.encryption.Ciphers; +import org.apache.iceberg.encryption.UnitestKMS; +import org.apache.iceberg.io.SeekableInputStream; +import org.apache.iceberg.parquet.Parquet; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.spark.SparkCatalogTestBase; +import org.apache.iceberg.types.Types; +import org.apache.parquet.crypto.ParquetCryptoRuntimeException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestTableEncryption extends SparkCatalogTestBase { + public TestTableEncryption( + String catalogName, String implementation, Map config) { + super(catalogName, implementation, config); + } + + @Before + public void createTables() throws IOException { + sql( + "CREATE TABLE %s (id bigint, data string, float float) USING iceberg " + + "TBLPROPERTIES ( " + + "'encryption.table-key-id'='%s' , " + + "'format-version'='2' , " + + "'encryption.kms.client-impl'='org.apache.iceberg.encryption.UnitestKMS' )", + tableName, UnitestKMS.MASTER_KEY_NAME1); + sql("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', float('NaN'))", tableName); + } + + @After + public void removeTables() { + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @Test + public void testSelect() { + List expected = + ImmutableList.of(row(1L, "a", 1.0F), row(2L, "b", 2.0F), row(3L, "c", Float.NaN)); + + assertEquals("Should return all expected rows", expected, sql("SELECT * FROM %s", tableName)); + } + + @Test + public void testDirectDataFileRead() { + List dataFileTable = + sql("SELECT file_path FROM %s.%s", tableName, MetadataTableType.ALL_DATA_FILES); + List dataFiles = + Streams.concat(dataFileTable.stream()) + .map(row -> (String) row[0]) + .collect(Collectors.toList()); + Schema schema = new Schema(optional(0, "id", Types.IntegerType.get())); + for (String filePath : dataFiles) { + AssertHelpers.assertThrows( + "Read without keys", + ParquetCryptoRuntimeException.class, + "Trying to read file with encrypted footer. No keys available", + () -> + Parquet.read(localInput(filePath)) + .project(schema) + .callInit() + .build() + .iterator() + .next()); + } + } + + @Test + public void testManifestEncryption() throws IOException { + List manifestFileTable = + sql("SELECT path FROM %s.%s", tableName, MetadataTableType.MANIFESTS); + + List manifestFiles = + Streams.concat(manifestFileTable.stream()) + .map(row -> (String) row[0]) + .collect(Collectors.toList()); + + if (!(manifestFiles.size() > 0)) { + throw new RuntimeException("No manifest files found for table " + tableName); + } + + String metadataFolderPath = null; + byte[] magic = new byte[4]; + + // Check encryption of manifest files + for (String manifestFilePath : manifestFiles) { + SeekableInputStream manifestFileReader = localInput(manifestFilePath).newStream(); + manifestFileReader.read(magic); + manifestFileReader.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + + if (metadataFolderPath == null) { + metadataFolderPath = new File(manifestFilePath).getParent().replaceFirst("file:", ""); + } + } + + // Find metadata list files and check their encryption + File[] listOfMetadataFiles = new File(metadataFolderPath).listFiles(); + boolean foundManifestListFile = false; + for (File metadataFile : listOfMetadataFiles) { + if (metadataFile.getName().startsWith("snap-")) { + foundManifestListFile = true; + SeekableInputStream manifestFileReader = localInput(metadataFile).newStream(); + manifestFileReader.read(magic); + manifestFileReader.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + } + } + + if (!foundManifestListFile) { + throw new RuntimeException("No manifest list files found for table " + tableName); + } + } +} From 2a7c80433c5025554aa5cc7a83b79dcf198af66a Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 20 Sep 2023 10:33:54 +0300 Subject: [PATCH 02/20] address review comments --- .../apache/iceberg/hadoop/HadoopCatalog.java | 432 ------------------ .../iceberg/hadoop/HadoopTableOperations.java | 16 +- .../org/apache/iceberg/hive/HiveCatalog.java | 9 +- .../iceberg/hive/HiveTableOperations.java | 22 +- .../spark/sql/TestTableEncryption.java | 48 +- 5 files changed, 66 insertions(+), 461 deletions(-) delete mode 100644 core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java deleted file mode 100644 index 92ba25af0f1c..000000000000 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.hadoop; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.file.AccessDeniedException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.iceberg.BaseMetastoreCatalog; -import org.apache.iceberg.CatalogProperties; -import org.apache.iceberg.CatalogUtil; -import org.apache.iceberg.LockManager; -import org.apache.iceberg.Schema; -import org.apache.iceberg.TableMetadata; -import org.apache.iceberg.TableOperations; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.AlreadyExistsException; -import org.apache.iceberg.exceptions.NamespaceNotEmptyException; -import org.apache.iceberg.exceptions.NoSuchNamespaceException; -import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.exceptions.RuntimeIOException; -import org.apache.iceberg.io.CloseableGroup; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.relocated.com.google.common.base.Joiner; -import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.base.Strings; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Sets; -import org.apache.iceberg.util.LocationUtil; -import org.apache.iceberg.util.LockManagers; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * HadoopCatalog provides a way to use table names like db.table to work with path-based tables - * under a common location. It uses a specified directory under a specified filesystem as the - * warehouse directory, and organizes multiple levels directories that mapped to the database, - * namespace and the table respectively. The HadoopCatalog takes a location as the warehouse - * directory. When creating a table such as $db.$tbl, it creates $db/$tbl directory under the - * warehouse directory, and put the table metadata into that directory. - * - *

The HadoopCatalog now supports {@link org.apache.iceberg.catalog.Catalog#createTable}, {@link - * org.apache.iceberg.catalog.Catalog#dropTable}, the {@link - * org.apache.iceberg.catalog.Catalog#renameTable} is not supported yet. - * - *

Note: The HadoopCatalog requires that the underlying file system supports atomic rename. - */ -public class HadoopCatalog extends BaseMetastoreCatalog - implements SupportsNamespaces, Configurable { - - private static final Logger LOG = LoggerFactory.getLogger(HadoopCatalog.class); - - private static final String TABLE_METADATA_FILE_EXTENSION = ".metadata.json"; - private static final Joiner SLASH = Joiner.on("/"); - private static final PathFilter TABLE_FILTER = - path -> path.getName().endsWith(TABLE_METADATA_FILE_EXTENSION); - private static final String HADOOP_SUPPRESS_PERMISSION_ERROR = "suppress-permission-error"; - - private String catalogName; - private Configuration conf; - private CloseableGroup closeableGroup; - private String warehouseLocation; - private FileSystem fs; - private FileIO fileIO; - private LockManager lockManager; - private boolean suppressPermissionError = false; - private Map catalogProperties; - - public HadoopCatalog() {} - - @Override - public void initialize(String name, Map properties) { - this.catalogProperties = ImmutableMap.copyOf(properties); - String inputWarehouseLocation = properties.get(CatalogProperties.WAREHOUSE_LOCATION); - Preconditions.checkArgument( - !Strings.isNullOrEmpty(inputWarehouseLocation), - "Cannot initialize HadoopCatalog because warehousePath must not be null or empty"); - - this.catalogName = name; - this.warehouseLocation = LocationUtil.stripTrailingSlash(inputWarehouseLocation); - this.fs = Util.getFs(new Path(warehouseLocation), conf); - - String fileIOImpl = - properties.getOrDefault( - CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.hadoop.HadoopFileIO"); - - this.fileIO = CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - - this.lockManager = LockManagers.from(properties); - - this.closeableGroup = new CloseableGroup(); - closeableGroup.addCloseable(lockManager); - closeableGroup.addCloseable(metricsReporter()); - closeableGroup.setSuppressCloseFailure(true); - - this.suppressPermissionError = - Boolean.parseBoolean(properties.get(HADOOP_SUPPRESS_PERMISSION_ERROR)); - } - - /** - * The constructor of the HadoopCatalog. It uses the passed location as its warehouse directory. - * - * @param conf The Hadoop configuration - * @param warehouseLocation The location used as warehouse directory - */ - public HadoopCatalog(Configuration conf, String warehouseLocation) { - setConf(conf); - initialize("hadoop", ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, warehouseLocation)); - } - - @Override - public String name() { - return catalogName; - } - - private boolean shouldSuppressPermissionError(IOException ioException) { - if (suppressPermissionError) { - return ioException instanceof AccessDeniedException - || (ioException.getMessage() != null - && ioException.getMessage().contains("AuthorizationPermissionMismatch")); - } - return false; - } - - private boolean isTableDir(Path path) { - Path metadataPath = new Path(path, "metadata"); - // Only the path which contains metadata is the path for table, otherwise it could be - // still a namespace. - try { - return fs.listStatus(metadataPath, TABLE_FILTER).length >= 1; - } catch (FileNotFoundException e) { - return false; - } catch (IOException e) { - if (shouldSuppressPermissionError(e)) { - LOG.warn("Unable to list metadata directory {}", metadataPath, e); - return false; - } else { - throw new UncheckedIOException(e); - } - } - } - - private boolean isDirectory(Path path) { - try { - return fs.getFileStatus(path).isDirectory(); - } catch (FileNotFoundException e) { - return false; - } catch (IOException e) { - if (shouldSuppressPermissionError(e)) { - LOG.warn("Unable to list directory {}", path, e); - return false; - } else { - throw new UncheckedIOException(e); - } - } - } - - @Override - public List listTables(Namespace namespace) { - Preconditions.checkArgument( - namespace.levels().length >= 1, "Missing database in table identifier: %s", namespace); - - Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); - Set tblIdents = Sets.newHashSet(); - - try { - if (!isDirectory(nsPath)) { - throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); - } - RemoteIterator it = fs.listStatusIterator(nsPath); - while (it.hasNext()) { - FileStatus status = it.next(); - if (!status.isDirectory()) { - // Ignore the path which is not a directory. - continue; - } - - Path path = status.getPath(); - if (isTableDir(path)) { - TableIdentifier tblIdent = TableIdentifier.of(namespace, path.getName()); - tblIdents.add(tblIdent); - } - } - } catch (IOException ioe) { - throw new RuntimeIOException(ioe, "Failed to list tables under: %s", namespace); - } - - return Lists.newArrayList(tblIdents); - } - - @Override - protected boolean isValidIdentifier(TableIdentifier identifier) { - return true; - } - - @Override - protected TableOperations newTableOps(TableIdentifier identifier) { - return new HadoopTableOperations( - new Path(defaultWarehouseLocation(identifier)), fileIO, conf, lockManager); - } - - @Override - protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { - String tableName = tableIdentifier.name(); - StringBuilder sb = new StringBuilder(); - - sb.append(warehouseLocation).append('/'); - for (String level : tableIdentifier.namespace().levels()) { - sb.append(level).append('/'); - } - sb.append(tableName); - - return sb.toString(); - } - - @Override - public boolean dropTable(TableIdentifier identifier, boolean purge) { - if (!isValidIdentifier(identifier)) { - throw new NoSuchTableException("Invalid identifier: %s", identifier); - } - - Path tablePath = new Path(defaultWarehouseLocation(identifier)); - TableOperations ops = newTableOps(identifier); - TableMetadata lastMetadata = ops.current(); - try { - if (lastMetadata == null) { - LOG.debug("Not an iceberg table: {}", identifier); - return false; - } else { - if (purge) { - // Since the data files and the metadata files may store in different locations, - // so it has to call dropTableData to force delete the data file. - CatalogUtil.dropTableData(ops.io(), lastMetadata); - } - return fs.delete(tablePath, true /* recursive */); - } - } catch (IOException e) { - throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath); - } - } - - @Override - public void renameTable(TableIdentifier from, TableIdentifier to) { - throw new UnsupportedOperationException("Cannot rename Hadoop tables"); - } - - @Override - public void createNamespace(Namespace namespace, Map meta) { - Preconditions.checkArgument( - !namespace.isEmpty(), "Cannot create namespace with invalid name: %s", namespace); - if (!meta.isEmpty()) { - throw new UnsupportedOperationException( - "Cannot create namespace " + namespace + ": metadata is not supported"); - } - - Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); - - if (isNamespace(nsPath)) { - throw new AlreadyExistsException("Namespace already exists: %s", namespace); - } - - try { - fs.mkdirs(nsPath); - - } catch (IOException e) { - throw new RuntimeIOException(e, "Create namespace failed: %s", namespace); - } - } - - @Override - public List listNamespaces(Namespace namespace) { - Path nsPath = - namespace.isEmpty() - ? new Path(warehouseLocation) - : new Path(warehouseLocation, SLASH.join(namespace.levels())); - if (!isNamespace(nsPath)) { - throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); - } - - try { - // using the iterator listing allows for paged downloads - // from HDFS and prefetching from object storage. - List namespaces = Lists.newArrayList(); - RemoteIterator it = fs.listStatusIterator(nsPath); - while (it.hasNext()) { - Path path = it.next().getPath(); - if (isNamespace(path)) { - namespaces.add(append(namespace, path.getName())); - } - } - return namespaces; - } catch (IOException ioe) { - throw new RuntimeIOException(ioe, "Failed to list namespace under: %s", namespace); - } - } - - private Namespace append(Namespace ns, String name) { - String[] levels = Arrays.copyOfRange(ns.levels(), 0, ns.levels().length + 1); - levels[ns.levels().length] = name; - return Namespace.of(levels); - } - - @Override - public boolean dropNamespace(Namespace namespace) { - Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); - - if (!isNamespace(nsPath) || namespace.isEmpty()) { - return false; - } - - try { - if (fs.listStatusIterator(nsPath).hasNext()) { - throw new NamespaceNotEmptyException("Namespace %s is not empty.", namespace); - } - - return fs.delete(nsPath, false /* recursive */); - } catch (IOException e) { - throw new RuntimeIOException(e, "Namespace delete failed: %s", namespace); - } - } - - @Override - public boolean setProperties(Namespace namespace, Map properties) { - throw new UnsupportedOperationException( - "Cannot set namespace properties " + namespace + " : setProperties is not supported"); - } - - @Override - public boolean removeProperties(Namespace namespace, Set properties) { - throw new UnsupportedOperationException( - "Cannot remove properties " + namespace + " : removeProperties is not supported"); - } - - @Override - public Map loadNamespaceMetadata(Namespace namespace) { - Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); - - if (!isNamespace(nsPath) || namespace.isEmpty()) { - throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); - } - - return ImmutableMap.of("location", nsPath.toString()); - } - - private boolean isNamespace(Path path) { - return isDirectory(path) && !isTableDir(path); - } - - @Override - public void close() throws IOException { - closeableGroup.close(); - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("name", catalogName) - .add("location", warehouseLocation) - .toString(); - } - - @Override - public TableBuilder buildTable(TableIdentifier identifier, Schema schema) { - return new HadoopCatalogTableBuilder(identifier, schema); - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - protected Map properties() { - return catalogProperties == null ? ImmutableMap.of() : catalogProperties; - } - - private class HadoopCatalogTableBuilder extends BaseMetastoreCatalogTableBuilder { - private final String defaultLocation; - - private HadoopCatalogTableBuilder(TableIdentifier identifier, Schema schema) { - super(identifier, schema); - defaultLocation = defaultWarehouseLocation(identifier); - } - - @Override - public TableBuilder withLocation(String location) { - Preconditions.checkArgument( - location == null || location.equals(defaultLocation), - "Cannot set a custom location for a path-based table. Expected " - + defaultLocation - + " but got " - + location); - return this; - } - } -} diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java index ae7ea982128a..99d5bf30bcf9 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java @@ -38,7 +38,8 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.EncryptionManagerFactory; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; +import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.exceptions.ValidationException; @@ -68,22 +69,21 @@ public class HadoopTableOperations implements TableOperations { private final Path location; private final FileIO fileIO; private final LockManager lockManager; - private final EncryptionManagerFactory encryptionManagerFactory; + private final StandardEncryptionManagerFactory encryptionManagerFactory; private volatile TableMetadata currentMetadata = null; private volatile Integer version = null; private volatile boolean shouldRefresh = true; - // Not used. TODO deprecate / handle in revapi protected HadoopTableOperations( Path location, FileIO fileIO, Configuration conf, LockManager lockManager) { - this(location, fileIO, EncryptionManagerFactory.NO_ENCRYPTION, conf, lockManager); + this(location, fileIO, null, conf, lockManager); } protected HadoopTableOperations( Path location, FileIO fileIO, - EncryptionManagerFactory encryptionManagerFactory, + StandardEncryptionManagerFactory encryptionManagerFactory, Configuration conf, LockManager lockManager) { this.conf = conf; @@ -193,7 +193,11 @@ public FileIO io() { @Override public EncryptionManager encryption() { - return encryptionManagerFactory.create(current()); + if (encryptionManagerFactory == null || current() == null || current().formatVersion() < 2) { + return PlaintextEncryptionManager.instance(); + } + + return encryptionManagerFactory.create(current().properties()); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index c2ef747d235c..d7eb2553d05b 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -44,8 +44,6 @@ import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; -import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.EncryptionManagerFactory; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -79,7 +77,7 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa private String name; private Configuration conf; private FileIO fileIO; - private EncryptionManagerFactory encryptionManagerFactory; + private StandardEncryptionManagerFactory encryptionManagerFactory; private ClientPool clients; private boolean listAllTables = false; private Map catalogProperties; @@ -114,8 +112,7 @@ public void initialize(String inputName, Map properties) { ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - this.encryptionManagerFactory = new StandardEncryptionManagerFactory(); - this.encryptionManagerFactory.initialize(properties); + this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); this.clients = new CachedClientPool(conf, properties); } @@ -197,7 +194,7 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { }); if (purge && lastMetadata != null) { - CatalogUtil.dropTableData(ops.io(), lastMetadata); + CatalogUtil.dropTableData(ops.io(), ops.encryption(), lastMetadata); } LOG.info("Dropped table: {}", identifier); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index e42f3939f3db..9232b59964dc 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -46,7 +46,8 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.EncryptionManagerFactory; +import org.apache.iceberg.encryption.PlaintextEncryptionManager; +import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; @@ -108,7 +109,7 @@ public static String translateToIcebergProp(String hmsProp) { private final long maxHiveTablePropertySize; private final int metadataRefreshMaxRetries; private final FileIO fileIO; - private final EncryptionManagerFactory encryptionManagerFactory; + private final StandardEncryptionManagerFactory encryptionManagerFactory; private final ClientPool metaClients; /** Tests only */ @@ -119,21 +120,14 @@ protected HiveTableOperations( String catalogName, String database, String table) { - this( - conf, - metaClients, - fileIO, - EncryptionManagerFactory.NO_ENCRYPTION, - catalogName, - database, - table); + this(conf, metaClients, fileIO, null, catalogName, database, table); } protected HiveTableOperations( Configuration conf, ClientPool metaClients, FileIO fileIO, - EncryptionManagerFactory encryptionManagerFactory, + StandardEncryptionManagerFactory encryptionManagerFactory, String catalogName, String database, String table) { @@ -165,7 +159,11 @@ public FileIO io() { @Override public EncryptionManager encryption() { - return encryptionManagerFactory.create(current()); + if (encryptionManagerFactory == null || current() == null || current().formatVersion() < 2) { + return PlaintextEncryptionManager.instance(); + } + + return encryptionManagerFactory.create(current().properties()); } @Override diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java index d984bcee2264..6562b2233de7 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.stream.Collectors; import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.MetadataTableType; import org.apache.iceberg.Schema; import org.apache.iceberg.encryption.Ciphers; @@ -35,16 +36,54 @@ import org.apache.iceberg.io.SeekableInputStream; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Streams; -import org.apache.iceberg.spark.SparkCatalogTestBase; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; import org.apache.parquet.crypto.ParquetCryptoRuntimeException; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestTableEncryption extends SparkTestBaseWithCatalog { + + private static Map appendCatalogEncryptionConfigProperties( + Map props) { + Map newProps = Maps.newHashMap(); + newProps.putAll(props); + newProps.put( + CatalogProperties.ENCRYPTION_KMS_TYPE, CatalogProperties.ENCRYPTION_KMS_CUSTOM_TYPE); + newProps.put(CatalogProperties.ENCRYPTION_KMS_CLIENT_IMPL, UnitestKMS.class.getCanonicalName()); + return newProps; + } + + // these parameters are broken out to avoid changes that need to modify lots of test suites + @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + public static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HIVE.catalogName(), + SparkCatalogConfig.HIVE.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HIVE.properties()) + }, + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HADOOP.properties()) + }, + { + SparkCatalogConfig.SPARK.catalogName(), + SparkCatalogConfig.SPARK.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.SPARK.properties()) + } + }; + } -public class TestTableEncryption extends SparkCatalogTestBase { public TestTableEncryption( String catalogName, String implementation, Map config) { super(catalogName, implementation, config); @@ -55,9 +94,8 @@ public void createTables() throws IOException { sql( "CREATE TABLE %s (id bigint, data string, float float) USING iceberg " + "TBLPROPERTIES ( " - + "'encryption.table-key-id'='%s' , " - + "'format-version'='2' , " - + "'encryption.kms.client-impl'='org.apache.iceberg.encryption.UnitestKMS' )", + + "'encryption.key-id'='%s' , " + + "'format-version'='2')", tableName, UnitestKMS.MASTER_KEY_NAME1); sql("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', float('NaN'))", tableName); } From bef73dcc43c106abae44c9be67f248c26d534dbe Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 20 Sep 2023 10:42:35 +0300 Subject: [PATCH 03/20] cleanup --- .../apache/iceberg/hadoop/HadoopCatalog.java | 438 ++++++++++++++++++ .../org/apache/iceberg/hive/HiveCatalog.java | 1 - 2 files changed, 438 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java new file mode 100644 index 000000000000..7fe1b9b2e6c4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.hadoop; + +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.AccessDeniedException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.LockManager; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableOperations; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.RuntimeIOException; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.LocationUtil; +import org.apache.iceberg.util.LockManagers; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * HadoopCatalog provides a way to use table names like db.table to work with path-based tables + * under a common location. It uses a specified directory under a specified filesystem as the + * warehouse directory, and organizes multiple levels directories that mapped to the database, + * namespace and the table respectively. The HadoopCatalog takes a location as the warehouse + * directory. When creating a table such as $db.$tbl, it creates $db/$tbl directory under the + * warehouse directory, and put the table metadata into that directory. + * + *

The HadoopCatalog now supports {@link org.apache.iceberg.catalog.Catalog#createTable}, {@link + * org.apache.iceberg.catalog.Catalog#dropTable}, the {@link + * org.apache.iceberg.catalog.Catalog#renameTable} is not supported yet. + * + *

Note: The HadoopCatalog requires that the underlying file system supports atomic rename. + */ +public class HadoopCatalog extends BaseMetastoreCatalog + implements Closeable, SupportsNamespaces, Configurable { + + private static final Logger LOG = LoggerFactory.getLogger(HadoopCatalog.class); + + private static final String TABLE_METADATA_FILE_EXTENSION = ".metadata.json"; + private static final Joiner SLASH = Joiner.on("/"); + private static final PathFilter TABLE_FILTER = + path -> path.getName().endsWith(TABLE_METADATA_FILE_EXTENSION); + private static final String HADOOP_SUPPRESS_PERMISSION_ERROR = "suppress-permission-error"; + + private String catalogName; + private Configuration conf; + private CloseableGroup closeableGroup; + private String warehouseLocation; + private FileSystem fs; + private FileIO fileIO; + private LockManager lockManager; + private boolean suppressPermissionError = false; + private Map catalogProperties; + private StandardEncryptionManagerFactory encryptionManagerFactory; + + public HadoopCatalog() {} + + @Override + public void initialize(String name, Map properties) { + this.catalogProperties = ImmutableMap.copyOf(properties); + String inputWarehouseLocation = properties.get(CatalogProperties.WAREHOUSE_LOCATION); + Preconditions.checkArgument( + inputWarehouseLocation != null && inputWarehouseLocation.length() > 0, + "Cannot initialize HadoopCatalog because warehousePath must not be null or empty"); + + this.catalogName = name; + this.warehouseLocation = LocationUtil.stripTrailingSlash(inputWarehouseLocation); + this.fs = Util.getFs(new Path(warehouseLocation), conf); + + String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); + this.fileIO = + fileIOImpl == null + ? new HadoopFileIO(conf) + : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); + this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + this.lockManager = LockManagers.from(properties); + + this.closeableGroup = new CloseableGroup(); + closeableGroup.addCloseable(lockManager); + closeableGroup.addCloseable(encryptionManagerFactory); + closeableGroup.setSuppressCloseFailure(true); + + this.suppressPermissionError = + Boolean.parseBoolean(properties.get(HADOOP_SUPPRESS_PERMISSION_ERROR)); + } + + /** + * The constructor of the HadoopCatalog. It uses the passed location as its warehouse directory. + * + * @param conf The Hadoop configuration + * @param warehouseLocation The location used as warehouse directory + */ + public HadoopCatalog(Configuration conf, String warehouseLocation) { + setConf(conf); + initialize("hadoop", ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, warehouseLocation)); + } + + @Override + public String name() { + return catalogName; + } + + private boolean shouldSuppressPermissionError(IOException ioException) { + if (suppressPermissionError) { + return ioException instanceof AccessDeniedException + || (ioException.getMessage() != null + && ioException.getMessage().contains("AuthorizationPermissionMismatch")); + } + return false; + } + + private boolean isTableDir(Path path) { + Path metadataPath = new Path(path, "metadata"); + // Only the path which contains metadata is the path for table, otherwise it could be + // still a namespace. + try { + return fs.listStatus(metadataPath, TABLE_FILTER).length >= 1; + } catch (FileNotFoundException e) { + return false; + } catch (IOException e) { + if (shouldSuppressPermissionError(e)) { + LOG.warn("Unable to list metadata directory {}", metadataPath, e); + return false; + } else { + throw new UncheckedIOException(e); + } + } + } + + private boolean isDirectory(Path path) { + try { + return fs.getFileStatus(path).isDirectory(); + } catch (FileNotFoundException e) { + return false; + } catch (IOException e) { + if (shouldSuppressPermissionError(e)) { + LOG.warn("Unable to list directory {}", path, e); + return false; + } else { + throw new UncheckedIOException(e); + } + } + } + + @Override + public List listTables(Namespace namespace) { + Preconditions.checkArgument( + namespace.levels().length >= 1, "Missing database in table identifier: %s", namespace); + + Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); + Set tblIdents = Sets.newHashSet(); + + try { + if (!isDirectory(nsPath)) { + throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); + } + RemoteIterator it = fs.listStatusIterator(nsPath); + while (it.hasNext()) { + FileStatus status = it.next(); + if (!status.isDirectory()) { + // Ignore the path which is not a directory. + continue; + } + + Path path = status.getPath(); + if (isTableDir(path)) { + TableIdentifier tblIdent = TableIdentifier.of(namespace, path.getName()); + tblIdents.add(tblIdent); + } + } + } catch (IOException ioe) { + throw new RuntimeIOException(ioe, "Failed to list tables under: %s", namespace); + } + + return Lists.newArrayList(tblIdents); + } + + @Override + protected boolean isValidIdentifier(TableIdentifier identifier) { + return true; + } + + @Override + protected TableOperations newTableOps(TableIdentifier identifier) { + return new HadoopTableOperations( + new Path(defaultWarehouseLocation(identifier)), + fileIO, + encryptionManagerFactory, + conf, + lockManager); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + String tableName = tableIdentifier.name(); + StringBuilder sb = new StringBuilder(); + + sb.append(warehouseLocation).append('/'); + for (String level : tableIdentifier.namespace().levels()) { + sb.append(level).append('/'); + } + sb.append(tableName); + + return sb.toString(); + } + + @Override + public boolean dropTable(TableIdentifier identifier, boolean purge) { + if (!isValidIdentifier(identifier)) { + throw new NoSuchTableException("Invalid identifier: %s", identifier); + } + + Path tablePath = new Path(defaultWarehouseLocation(identifier)); + TableOperations ops = newTableOps(identifier); + TableMetadata lastMetadata = ops.current(); + try { + if (lastMetadata == null) { + LOG.debug("Not an iceberg table: {}", identifier); + return false; + } else { + if (purge) { + // Since the data files and the metadata files may store in different locations, + // so it has to call dropTableData to force delete the data file. + CatalogUtil.dropTableData(ops.io(), ops.encryption(), lastMetadata); + } + return fs.delete(tablePath, true /* recursive */); + } + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to delete file: %s", tablePath); + } + } + + @Override + public void renameTable(TableIdentifier from, TableIdentifier to) { + throw new UnsupportedOperationException("Cannot rename Hadoop tables"); + } + + @Override + public void createNamespace(Namespace namespace, Map meta) { + Preconditions.checkArgument( + !namespace.isEmpty(), "Cannot create namespace with invalid name: %s", namespace); + if (!meta.isEmpty()) { + throw new UnsupportedOperationException( + "Cannot create namespace " + namespace + ": metadata is not supported"); + } + + Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); + + if (isNamespace(nsPath)) { + throw new AlreadyExistsException("Namespace already exists: %s", namespace); + } + + try { + fs.mkdirs(nsPath); + + } catch (IOException e) { + throw new RuntimeIOException(e, "Create namespace failed: %s", namespace); + } + } + + @Override + public List listNamespaces(Namespace namespace) { + Path nsPath = + namespace.isEmpty() + ? new Path(warehouseLocation) + : new Path(warehouseLocation, SLASH.join(namespace.levels())); + if (!isNamespace(nsPath)) { + throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); + } + + try { + // using the iterator listing allows for paged downloads + // from HDFS and prefetching from object storage. + List namespaces = Lists.newArrayList(); + RemoteIterator it = fs.listStatusIterator(nsPath); + while (it.hasNext()) { + Path path = it.next().getPath(); + if (isNamespace(path)) { + namespaces.add(append(namespace, path.getName())); + } + } + return namespaces; + } catch (IOException ioe) { + throw new RuntimeIOException(ioe, "Failed to list namespace under: %s", namespace); + } + } + + private Namespace append(Namespace ns, String name) { + String[] levels = Arrays.copyOfRange(ns.levels(), 0, ns.levels().length + 1); + levels[ns.levels().length] = name; + return Namespace.of(levels); + } + + @Override + public boolean dropNamespace(Namespace namespace) { + Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); + + if (!isNamespace(nsPath) || namespace.isEmpty()) { + return false; + } + + try { + if (fs.listStatusIterator(nsPath).hasNext()) { + throw new NamespaceNotEmptyException("Namespace %s is not empty.", namespace); + } + + return fs.delete(nsPath, false /* recursive */); + } catch (IOException e) { + throw new RuntimeIOException(e, "Namespace delete failed: %s", namespace); + } + } + + @Override + public boolean setProperties(Namespace namespace, Map properties) { + throw new UnsupportedOperationException( + "Cannot set namespace properties " + namespace + " : setProperties is not supported"); + } + + @Override + public boolean removeProperties(Namespace namespace, Set properties) { + throw new UnsupportedOperationException( + "Cannot remove properties " + namespace + " : removeProperties is not supported"); + } + + @Override + public Map loadNamespaceMetadata(Namespace namespace) { + Path nsPath = new Path(warehouseLocation, SLASH.join(namespace.levels())); + + if (!isNamespace(nsPath) || namespace.isEmpty()) { + throw new NoSuchNamespaceException("Namespace does not exist: %s", namespace); + } + + return ImmutableMap.of("location", nsPath.toString()); + } + + private boolean isNamespace(Path path) { + return isDirectory(path) && !isTableDir(path); + } + + @Override + public void close() throws IOException { + closeableGroup.close(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("name", catalogName) + .add("location", warehouseLocation) + .toString(); + } + + @Override + public TableBuilder buildTable(TableIdentifier identifier, Schema schema) { + return new HadoopCatalogTableBuilder(identifier, schema); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + protected Map properties() { + return catalogProperties == null ? ImmutableMap.of() : catalogProperties; + } + + private class HadoopCatalogTableBuilder extends BaseMetastoreCatalogTableBuilder { + private final String defaultLocation; + + private HadoopCatalogTableBuilder(TableIdentifier identifier, Schema schema) { + super(identifier, schema); + defaultLocation = defaultWarehouseLocation(identifier); + } + + @Override + public TableBuilder withLocation(String location) { + Preconditions.checkArgument( + location == null || location.equals(defaultLocation), + "Cannot set a custom location for a path-based table. Expected " + + defaultLocation + + " but got " + + location); + return this; + } + } +} diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index d7eb2553d05b..4c48b050e0d9 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -113,7 +113,6 @@ public void initialize(String inputName, Map properties) { : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); - this.clients = new CachedClientPool(conf, properties); } From 4bb958f08f67664f6b1fe1e95b4a9d9b2cad1549 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 20 Sep 2023 11:01:10 +0300 Subject: [PATCH 04/20] enable null table metadata for old unitests --- .../java/org/apache/iceberg/hadoop/HadoopCatalog.java | 8 +++++++- .../apache/iceberg/hadoop/HadoopTableOperations.java | 10 +++++++++- .../main/java/org/apache/iceberg/hive/HiveCatalog.java | 5 ++++- .../org/apache/iceberg/hive/HiveTableOperations.java | 10 +++++++++- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java index 7fe1b9b2e6c4..72923fda7cf6 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java @@ -118,7 +118,13 @@ public void initialize(String name, Map properties) { fileIOImpl == null ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + + if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { + this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + } else { + this.encryptionManagerFactory = null; + } + this.lockManager = LockManagers.from(properties); this.closeableGroup = new CloseableGroup(); diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java index 99d5bf30bcf9..bd30bcc9ffe6 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java @@ -193,7 +193,15 @@ public FileIO io() { @Override public EncryptionManager encryption() { - if (encryptionManagerFactory == null || current() == null || current().formatVersion() < 2) { + if (encryptionManagerFactory == null) { + return PlaintextEncryptionManager.instance(); + } + + if (current() == null) { + throw new IllegalStateException("No table metadata"); + } + + if (current().formatVersion() < 2) { return PlaintextEncryptionManager.instance(); } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 4c48b050e0d9..609a6a7eed86 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -112,7 +112,10 @@ public void initialize(String inputName, Map properties) { ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { + this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + } + this.clients = new CachedClientPool(conf, properties); } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 9232b59964dc..986bd6a6be40 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -159,7 +159,15 @@ public FileIO io() { @Override public EncryptionManager encryption() { - if (encryptionManagerFactory == null || current() == null || current().formatVersion() < 2) { + if (encryptionManagerFactory == null) { + return PlaintextEncryptionManager.instance(); + } + + if (current() == null) { + throw new IllegalStateException("No table metadata"); + } + + if (current().formatVersion() < 2) { return PlaintextEncryptionManager.instance(); } From 636c23311169a2113c047a682a4bc1761c6f2fa2 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 18 Oct 2023 14:12:16 +0300 Subject: [PATCH 05/20] remove encryption factory --- .../apache/iceberg/hadoop/HadoopCatalog.java | 15 ++++++------ .../iceberg/hadoop/HadoopTableOperations.java | 19 ++++++++++----- .../org/apache/iceberg/hive/HiveCatalog.java | 9 +++---- .../iceberg/hive/HiveTableOperations.java | 24 ++++++++++++++----- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java index 72923fda7cf6..0b3a7c4b0507 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java @@ -44,7 +44,8 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; @@ -97,7 +98,7 @@ public class HadoopCatalog extends BaseMetastoreCatalog private LockManager lockManager; private boolean suppressPermissionError = false; private Map catalogProperties; - private StandardEncryptionManagerFactory encryptionManagerFactory; + private KeyManagementClient keyManagementClient; public HadoopCatalog() {} @@ -119,17 +120,15 @@ public void initialize(String name, Map properties) { ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { - this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); - } else { - this.encryptionManagerFactory = null; + if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { + this.keyManagementClient = EncryptionUtil.createKmsClient(properties); } this.lockManager = LockManagers.from(properties); this.closeableGroup = new CloseableGroup(); closeableGroup.addCloseable(lockManager); - closeableGroup.addCloseable(encryptionManagerFactory); + closeableGroup.addCloseable(keyManagementClient); closeableGroup.setSuppressCloseFailure(true); this.suppressPermissionError = @@ -237,7 +236,7 @@ protected TableOperations newTableOps(TableIdentifier identifier) { return new HadoopTableOperations( new Path(defaultWarehouseLocation(identifier)), fileIO, - encryptionManagerFactory, + keyManagementClient, conf, lockManager); } diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java index bd30bcc9ffe6..f8ed0520ec41 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.hadoop; +import static org.apache.iceberg.TableProperties.ENCRYPTION_TABLE_KEY; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -38,8 +40,9 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.encryption.PlaintextEncryptionManager; -import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.exceptions.ValidationException; @@ -69,7 +72,7 @@ public class HadoopTableOperations implements TableOperations { private final Path location; private final FileIO fileIO; private final LockManager lockManager; - private final StandardEncryptionManagerFactory encryptionManagerFactory; + private final KeyManagementClient keyManagementClient; private volatile TableMetadata currentMetadata = null; private volatile Integer version = null; @@ -83,14 +86,14 @@ protected HadoopTableOperations( protected HadoopTableOperations( Path location, FileIO fileIO, - StandardEncryptionManagerFactory encryptionManagerFactory, + KeyManagementClient keyManagementClient, Configuration conf, LockManager lockManager) { this.conf = conf; this.location = location; this.fileIO = fileIO; this.lockManager = lockManager; - this.encryptionManagerFactory = encryptionManagerFactory; + this.keyManagementClient = keyManagementClient; } @Override @@ -193,7 +196,7 @@ public FileIO io() { @Override public EncryptionManager encryption() { - if (encryptionManagerFactory == null) { + if (keyManagementClient == null) { return PlaintextEncryptionManager.instance(); } @@ -202,10 +205,14 @@ public EncryptionManager encryption() { } if (current().formatVersion() < 2) { + if (current().properties().containsKey(ENCRYPTION_TABLE_KEY)) { + throw new IllegalStateException("Encryption is not supported in v1 tables"); + } + return PlaintextEncryptionManager.instance(); } - return encryptionManagerFactory.create(current().properties()); + return EncryptionUtil.createEncryptionManager(current().properties(), keyManagementClient); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 609a6a7eed86..dff03f2f4707 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -43,7 +43,8 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -77,7 +78,7 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa private String name; private Configuration conf; private FileIO fileIO; - private StandardEncryptionManagerFactory encryptionManagerFactory; + private KeyManagementClient keyManagementClient; private ClientPool clients; private boolean listAllTables = false; private Map catalogProperties; @@ -113,7 +114,7 @@ public void initialize(String inputName, Map properties) { : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { - this.encryptionManagerFactory = new StandardEncryptionManagerFactory(properties); + this.keyManagementClient = EncryptionUtil.createKmsClient(properties); } this.clients = new CachedClientPool(conf, properties); @@ -519,7 +520,7 @@ public TableOperations newTableOps(TableIdentifier tableIdentifier) { String dbName = tableIdentifier.namespace().level(0); String tableName = tableIdentifier.name(); return new HiveTableOperations( - conf, clients, fileIO, encryptionManagerFactory, name, dbName, tableName); + conf, clients, fileIO, keyManagementClient, name, dbName, tableName); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 986bd6a6be40..239293f799c4 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.hive; +import static org.apache.iceberg.TableProperties.ENCRYPTION_TABLE_KEY; import static org.apache.iceberg.TableProperties.GC_ENABLED; import com.fasterxml.jackson.core.JsonProcessingException; @@ -46,8 +47,9 @@ import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.encryption.PlaintextEncryptionManager; -import org.apache.iceberg.encryption.StandardEncryptionManagerFactory; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; @@ -109,8 +111,10 @@ public static String translateToIcebergProp(String hmsProp) { private final long maxHiveTablePropertySize; private final int metadataRefreshMaxRetries; private final FileIO fileIO; - private final StandardEncryptionManagerFactory encryptionManagerFactory; + private final KeyManagementClient keyManagementClient; private final ClientPool metaClients; + private boolean fetchedTableKeyID = false; + private String tableEncryptionKeyID; /** Tests only */ protected HiveTableOperations( @@ -127,14 +131,14 @@ protected HiveTableOperations( Configuration conf, ClientPool metaClients, FileIO fileIO, - StandardEncryptionManagerFactory encryptionManagerFactory, + KeyManagementClient keyManagementClient, String catalogName, String database, String table) { this.conf = conf; this.metaClients = metaClients; this.fileIO = fileIO; - this.encryptionManagerFactory = encryptionManagerFactory; + this.keyManagementClient = keyManagementClient; this.fullName = catalogName + "." + database + "." + table; this.catalogName = catalogName; this.database = database; @@ -159,7 +163,7 @@ public FileIO io() { @Override public EncryptionManager encryption() { - if (encryptionManagerFactory == null) { + if (keyManagementClient == null) { return PlaintextEncryptionManager.instance(); } @@ -168,10 +172,14 @@ public EncryptionManager encryption() { } if (current().formatVersion() < 2) { + if (current().properties().containsKey(ENCRYPTION_TABLE_KEY)) { + throw new IllegalStateException("Encryption is not supported in v1 tables"); + } + return PlaintextEncryptionManager.instance(); } - return encryptionManagerFactory.create(current().properties()); + return EncryptionUtil.createEncryptionManager(current().properties(), keyManagementClient); } @Override @@ -260,6 +268,10 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { .collect(Collectors.toSet()); } + if (removedProps.contains(TableProperties.ENCRYPTION_TABLE_KEY)) { + throw new RuntimeException("Cannot remove key in encrypted table"); + } + Map summary = Optional.ofNullable(metadata.currentSnapshot()) .map(Snapshot::summary) From 454741722574d6d1804c2dd639281bb92de3578e Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 16 Jan 2024 09:28:42 +0200 Subject: [PATCH 06/20] update --- .../spark/sql/TestAvroTableEncryption.java | 189 ++++++++++++++++++ .../spark/sql/TestTableEncryption.java | 4 +- 2 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java rename spark/{v3.4 => v3.5}/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java (97%) diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java new file mode 100644 index 000000000000..9b61c6f6f7be --- /dev/null +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.spark.sql; + +import static org.apache.iceberg.Files.localInput; +import static org.apache.iceberg.types.Types.NestedField.optional; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.Schema; +import org.apache.iceberg.avro.Avro; +import org.apache.iceberg.encryption.Ciphers; +import org.apache.iceberg.encryption.UnitestKMS; +import org.apache.iceberg.exceptions.RuntimeIOException; +import org.apache.iceberg.io.SeekableInputStream; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.spark.SparkCatalogConfig; +import org.apache.iceberg.spark.SparkTestBaseWithCatalog; +import org.apache.iceberg.types.Types; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestAvroTableEncryption extends SparkTestBaseWithCatalog { + + private static Map appendCatalogEncryptionConfigProperties( + Map props) { + Map newProps = Maps.newHashMap(); + newProps.putAll(props); + newProps.put(CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); + return newProps; + } + + // these parameters are broken out to avoid changes that need to modify lots of test suites + @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + public static Object[][] parameters() { + return new Object[][] { + { + SparkCatalogConfig.HIVE.catalogName(), + SparkCatalogConfig.HIVE.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HIVE.properties()) + }, + { + SparkCatalogConfig.HADOOP.catalogName(), + SparkCatalogConfig.HADOOP.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HADOOP.properties()) + }, + { + SparkCatalogConfig.SPARK.catalogName(), + SparkCatalogConfig.SPARK.implementation(), + appendCatalogEncryptionConfigProperties(SparkCatalogConfig.SPARK.properties()) + } + }; + } + + public TestAvroTableEncryption( + String catalogName, String implementation, Map config) { + super(catalogName, implementation, config); + } + + @Before + public void createTables() throws IOException { + sql( + "CREATE TABLE %s (id bigint, data string, float float) USING iceberg " + + "TBLPROPERTIES ( " + + "'encryption.key-id'='%s' , " + + "'write.format.default'='AVRO' , " + + "'format-version'='2')", + tableName, UnitestKMS.MASTER_KEY_NAME1); + sql("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', float('NaN'))", tableName); + } + + @After + public void removeTables() { + sql("DROP TABLE IF EXISTS %s", tableName); + } + + @Test + public void testSelect() { + List expected = + ImmutableList.of(row(1L, "a", 1.0F), row(2L, "b", 2.0F), row(3L, "c", Float.NaN)); + + assertEquals("Should return all expected rows", expected, sql("SELECT * FROM %s", tableName)); + } + + @Test + public void testDirectDataFileRead() throws IOException { + List dataFileTable = + sql("SELECT file_path FROM %s.%s", tableName, MetadataTableType.ALL_DATA_FILES); + List dataFiles = + Streams.concat(dataFileTable.stream()) + .map(row -> (String) row[0]) + .collect(Collectors.toList()); + Schema schema = new Schema(optional(0, "id", Types.IntegerType.get())); + byte[] magic = new byte[4]; + for (String filePath : dataFiles) { + AssertHelpers.assertThrows( + "Read without keys", + RuntimeIOException.class, + "Failed to open file", + () -> Avro.read(localInput(filePath)).project(schema).build().iterator().next()); + + // Verify encryption of data files + SeekableInputStream dataFileReader = localInput(filePath).newStream(); + dataFileReader.read(magic); + dataFileReader.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + } + } + + @Test + public void testManifestEncryption() throws IOException { + List manifestFileTable = + sql("SELECT path FROM %s.%s", tableName, MetadataTableType.MANIFESTS); + + List manifestFiles = + Streams.concat(manifestFileTable.stream()) + .map(row -> (String) row[0]) + .collect(Collectors.toList()); + + if (!(manifestFiles.size() > 0)) { + throw new RuntimeException("No manifest files found for table " + tableName); + } + + String metadataFolderPath = null; + byte[] magic = new byte[4]; + + // Check encryption of manifest files + for (String manifestFilePath : manifestFiles) { + SeekableInputStream manifestFileReader = localInput(manifestFilePath).newStream(); + manifestFileReader.read(magic); + manifestFileReader.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + + if (metadataFolderPath == null) { + metadataFolderPath = new File(manifestFilePath).getParent().replaceFirst("file:", ""); + } + } + + // Find metadata list files and check their encryption + File[] listOfMetadataFiles = new File(metadataFolderPath).listFiles(); + boolean foundManifestListFile = false; + for (File metadataFile : listOfMetadataFiles) { + if (metadataFile.getName().startsWith("snap-")) { + foundManifestListFile = true; + SeekableInputStream manifestFileReader = localInput(metadataFile).newStream(); + manifestFileReader.read(magic); + manifestFileReader.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + } + } + + if (!foundManifestListFile) { + throw new RuntimeException("No manifest list files found for table " + tableName); + } + } +} diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java similarity index 97% rename from spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java rename to spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java index 6562b2233de7..df8741eebc75 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -56,9 +56,7 @@ private static Map appendCatalogEncryptionConfigProperties( Map props) { Map newProps = Maps.newHashMap(); newProps.putAll(props); - newProps.put( - CatalogProperties.ENCRYPTION_KMS_TYPE, CatalogProperties.ENCRYPTION_KMS_CUSTOM_TYPE); - newProps.put(CatalogProperties.ENCRYPTION_KMS_CLIENT_IMPL, UnitestKMS.class.getCanonicalName()); + newProps.put(CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); return newProps; } From c2b355c06575b7eb689fee0c5d992da43bea58a3 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 21 Apr 2024 11:25:26 +0300 Subject: [PATCH 07/20] Metadata file encryption --- .palantir/revapi.yml | 29 ++++ .../iceberg/encryption/EncryptingFileIO.java | 45 +++--- .../iceberg/BaseMetastoreTableOperations.java | 145 ++++++++++++++++-- .../java/org/apache/iceberg/MetadataFile.java | 56 +++++++ .../org/apache/iceberg/SerializableTable.java | 10 +- .../apache/iceberg/TableMetadataParser.java | 69 ++++++++- .../encryption/AesGcmOutputStream.java | 4 + .../iceberg/encryption/EncryptionUtil.java | 30 ++-- .../encryption/KeyManagementClient.java | 2 +- .../apache/iceberg/hadoop/HadoopCatalog.java | 31 ++-- .../iceberg/hadoop/HadoopTableOperations.java | 37 ----- .../encryption/EncryptionTestHelpers.java | 7 +- .../iceberg/hive/HiveTableOperations.java | 97 +++++++++--- .../TestSparkCatalogHadoopOverrides.java | 35 ++++- .../spark/sql/TestAvroTableEncryption.java | 103 ++----------- .../spark/sql/TestTableEncryption.java | 91 +++++------ 16 files changed, 513 insertions(+), 278 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/MetadataFile.java diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index a41d3ddfb8df..3eeda0168978 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1018,6 +1018,35 @@ acceptedBreaks: old: "method void org.apache.iceberg.PositionDeletesTable.PositionDeletesBatchScan::(org.apache.iceberg.Table,\ \ org.apache.iceberg.Schema, org.apache.iceberg.TableScanContext)" justification: "Removing deprecated code" + "1.5.0": + org.apache.iceberg:iceberg-api: + - code: "java.class.defaultSerializationChanged" + old: "class org.apache.iceberg.encryption.EncryptingFileIO" + new: "class org.apache.iceberg.encryption.EncryptingFileIO" + justification: "Decrypting input mist receive length" + - code: "java.method.removed" + old: "method org.apache.iceberg.io.InputFile org.apache.iceberg.encryption.EncryptingFileIO::newDecryptingInputFile(java.lang.String,\ + \ java.nio.ByteBuffer)" + justification: "Decrypting input must receive length" + org.apache.iceberg:iceberg-core: + - code: "java.method.returnTypeChanged" + old: "method void org.apache.iceberg.TableMetadataParser::internalWrite(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile, boolean)" + new: "method long org.apache.iceberg.TableMetadataParser::internalWrite(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile, boolean)" + justification: "Return length of the written metadata file" + - code: "java.method.returnTypeChanged" + old: "method void org.apache.iceberg.TableMetadataParser::overwrite(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile)" + new: "method long org.apache.iceberg.TableMetadataParser::overwrite(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile)" + justification: "Return length of the written metadata file" + - code: "java.method.returnTypeChanged" + old: "method void org.apache.iceberg.TableMetadataParser::write(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile)" + new: "method long org.apache.iceberg.TableMetadataParser::write(org.apache.iceberg.TableMetadata,\ + \ org.apache.iceberg.io.OutputFile)" + justification: "Return length of the written metadata file" apache-iceberg-0.14.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" diff --git a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java index 0203361844a5..a0c7798bc20e 100644 --- a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java +++ b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java @@ -28,9 +28,11 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.ManifestFile; +import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; @@ -71,6 +73,10 @@ public EncryptionManager encryptionManager() { return em; } + public FileIO sourceFileIO() { + return io; + } + @Override public InputFile newInputFile(String path) { return io.newInputFile(path); @@ -109,14 +115,19 @@ public InputFile newInputFile(ManifestFile manifest) { } } - public InputFile newDecryptingInputFile(String path, ByteBuffer buffer) { - return em.decrypt(wrap(io.newInputFile(path), buffer)); - } - public InputFile newDecryptingInputFile(String path, long length, ByteBuffer buffer) { - // TODO: is the length correct for the encrypted file? It may be the length of the plaintext - // stream - return em.decrypt(wrap(io.newInputFile(path, length), buffer)); + Preconditions.checkArgument( + length > 0, "Cannot safely decrypt table metadata file because its size is not specified"); + + InputFile inputFile = io.newInputFile(path, length); + + if (inputFile.getLength() != length) { + throw new RuntimeIOException( + "Cannot safely decrypt a file because its size was changed by FileIO %s from %s to %s", + io.getClass(), length, inputFile.getLength()); + } + + return em.decrypt(wrap(inputFile, buffer)); } @Override @@ -157,7 +168,7 @@ private static SimpleEncryptedInputFile wrap(InputFile encryptedInputFile, ByteB } private static EncryptionKeyMetadata toKeyMetadata(ByteBuffer buffer) { - return buffer != null ? new SimpleKeyMetadata(buffer) : EmptyKeyMetadata.get(); + return buffer != null ? new SimpleKeyMetadata(buffer) : EncryptionKeyMetadata.empty(); } private static class SimpleEncryptedInputFile implements EncryptedInputFile { @@ -198,22 +209,4 @@ public EncryptionKeyMetadata copy() { return new SimpleKeyMetadata(metadataBuffer.duplicate()); } } - - private static class EmptyKeyMetadata implements EncryptionKeyMetadata { - private static final EmptyKeyMetadata INSTANCE = new EmptyKeyMetadata(); - - private static EmptyKeyMetadata get() { - return INSTANCE; - } - - @Override - public ByteBuffer buffer() { - return null; - } - - @Override - public EncryptionKeyMetadata copy() { - return this; - } - } } diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java index 6443cf6e60ea..3c07dcfed1dd 100644 --- a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java @@ -18,12 +18,19 @@ */ package org.apache.iceberg; +import java.nio.ByteBuffer; +import java.util.Base64; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; +import org.apache.iceberg.encryption.EncryptedOutputFile; +import org.apache.iceberg.encryption.EncryptingFileIO; import org.apache.iceberg.encryption.EncryptionManager; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; +import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -46,14 +53,21 @@ public abstract class BaseMetastoreTableOperations extends BaseMetastoreOperatio public static final String TABLE_TYPE_PROP = "table_type"; public static final String ICEBERG_TABLE_TYPE_VALUE = "iceberg"; public static final String METADATA_LOCATION_PROP = "metadata_location"; + public static final String METADATA_WRAPPED_KEY_PROP = "metadata_wrapped_key"; + public static final String METADATA_SIZE_PROP = "metadata_size"; public static final String PREVIOUS_METADATA_LOCATION_PROP = "previous_metadata_location"; + public static final String PREVIOUS_METADATA_WRAPPED_KEY_PROP = "previous_metadata_wrapped_key"; + public static final String PREVIOUS_METADATA_SIZE_PROP = "previous_metadata_size"; private static final String METADATA_FOLDER_NAME = "metadata"; private TableMetadata currentMetadata = null; private String currentMetadataLocation = null; + private MetadataFile currentMetadataFile = null; private boolean shouldRefresh = true; private int version = -1; + private String encryptionKeyId; + private int encryptionDekLength = -1; protected BaseMetastoreTableOperations() {} @@ -65,6 +79,14 @@ protected BaseMetastoreTableOperations() {} */ protected abstract String tableName(); + protected String encryptionKeyIdFromProps() { + return encryptionKeyId; + } + + protected int dekLength() { + return encryptionDekLength; + } + @Override public TableMetadata current() { if (shouldRefresh) { @@ -77,6 +99,10 @@ public String currentMetadataLocation() { return currentMetadataLocation; } + public MetadataFile currentMetadataFile() { + return currentMetadataFile; + } + public int currentVersion() { return version; } @@ -146,21 +172,92 @@ protected void disableRefresh() { } protected String writeNewMetadataIfRequired(boolean newTable, TableMetadata metadata) { + return writeNewMetadataFileIfRequired(newTable, metadata).location(); + } + + protected MetadataFile writeNewMetadataFileIfRequired(boolean newTable, TableMetadata metadata) { return newTable && metadata.metadataFileLocation() != null - ? metadata.metadataFileLocation() - : writeNewMetadata(metadata, currentVersion() + 1); + ? new MetadataFile(metadata.metadataFileLocation(), null, -1L) + : writeNewMetadataFile(metadata, currentVersion() + 1); } protected String writeNewMetadata(TableMetadata metadata, int newVersion) { + return writeNewMetadataFile(metadata, newVersion).location(); + } + + protected MetadataFile writeNewMetadataFile(TableMetadata metadata, int newVersion) { String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion); - OutputFile newMetadataLocation = io().newOutputFile(newTableMetadataFilePath); + + if (encryptionKeyId == null) { + encryptionKeyId = metadata.property(TableProperties.ENCRYPTION_TABLE_KEY, null); + } + + OutputFile newMetadataFile; + String wrappedMetadataKey; + + if (encryptionKeyId != null) { + + if (encryptionDekLength < 0) { + String encryptionDekLenProp = + metadata.property(TableProperties.ENCRYPTION_DEK_LENGTH, null); + encryptionDekLength = + (encryptionDekLenProp == null) + ? TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT + : Integer.valueOf(encryptionDekLenProp); + } + + FileIO io = io(); + Preconditions.checkArgument( + io instanceof EncryptingFileIO, + "Cannot encrypt table metadata because the fileIO (%s) does not " + + "implement EncryptingFileIO", + io.getClass()); + EncryptingFileIO encryptingIO = (EncryptingFileIO) io(); + EncryptedOutputFile newEncryptedMetadataFile = + encryptingIO.newEncryptingOutputFile(newTableMetadataFilePath); + + if (newEncryptedMetadataFile.keyMetadata() == null + || newEncryptedMetadataFile.keyMetadata().buffer() == null) { + throw new IllegalStateException("Null key metadata in encrypted table"); + } + + newMetadataFile = newEncryptedMetadataFile.encryptingOutputFile(); + EncryptionManager encryptionManager = encryptingIO.encryptionManager(); + + Preconditions.checkArgument( + encryptionManager instanceof StandardEncryptionManager, + "Cannot encrypt table metadata because the encryption manager (%s) does not " + + "implement StandardEncryptionManager", + encryptionManager.getClass()); + NativeEncryptionKeyMetadata keyMetadata = + (NativeEncryptionKeyMetadata) newEncryptedMetadataFile.keyMetadata(); + ByteBuffer metadataEncryptionKey = keyMetadata.encryptionKey(); + // Wrap (encrypt) metadata file key + ByteBuffer wrappedEncryptionKey = + ((StandardEncryptionManager) encryptionManager).wrapKey(metadataEncryptionKey); + + ByteBuffer metadataAADPrefix = keyMetadata.aadPrefix(); + wrappedMetadataKey = + Base64.getEncoder() + .encodeToString( + EncryptionUtil.createKeyMetadata(wrappedEncryptionKey, metadataAADPrefix) + .buffer() + .array()); + } else { + newMetadataFile = io().newOutputFile(newTableMetadataFilePath); + wrappedMetadataKey = null; + } // write the new metadata // use overwrite to avoid negative caching in S3. this is safe because the metadata location is // always unique because it includes a UUID. - TableMetadataParser.overwrite(metadata, newMetadataLocation); + long size = TableMetadataParser.overwrite(metadata, newMetadataFile); - return newMetadataLocation.location(); + if (encryptionKeyId != null && size <= 0) { + throw new RuntimeException("Metadata file size is not recorded in an encrypted table"); + } + + return new MetadataFile(newMetadataFile.location(), wrappedMetadataKey, size); } protected void refreshFromMetadataLocation(String newLocation) { @@ -171,6 +268,10 @@ protected void refreshFromMetadataLocation(String newLocation, int numRetries) { refreshFromMetadataLocation(newLocation, null, numRetries); } + protected void refreshFromMetadataLocation(MetadataFile newLocation, int numRetries) { + refreshFromMetadataLocation(newLocation, null, numRetries); + } + protected void refreshFromMetadataLocation( String newLocation, Predicate shouldRetry, int numRetries) { refreshFromMetadataLocation( @@ -180,17 +281,40 @@ protected void refreshFromMetadataLocation( metadataLocation -> TableMetadataParser.read(io(), metadataLocation)); } + protected void refreshFromMetadataLocation( + MetadataFile newLocation, Predicate shouldRetry, int numRetries) { + refreshFromMetadataLocation( + newLocation, + shouldRetry, + numRetries, + metadataFile -> TableMetadataParser.read(io(), metadataFile)); + } + protected void refreshFromMetadataLocation( String newLocation, Predicate shouldRetry, int numRetries, Function metadataLoader) { + MetadataFile metadataFile = new MetadataFile(newLocation, null, 0); + + refreshFromMetadataLocation( + metadataFile, + shouldRetry, + numRetries, + metadataLocation -> metadataLoader.apply(metadataLocation.location())); + } + + protected void refreshFromMetadataLocation( + MetadataFile newMetadataFile, + Predicate shouldRetry, + int numRetries, + Function metadataLoader) { // use null-safe equality check because new tables have a null metadata location - if (!Objects.equal(currentMetadataLocation, newLocation)) { - LOG.info("Refreshing table metadata from new version: {}", newLocation); + if (!Objects.equal(currentMetadataLocation, newMetadataFile.location())) { + LOG.info("Refreshing table metadata from new version: {}", newMetadataFile.location()); AtomicReference newMetadata = new AtomicReference<>(); - Tasks.foreach(newLocation) + Tasks.foreach(newMetadataFile) .retry(numRetries) .exponentialBackoff(100, 5000, 600000, 4.0 /* 100, 400, 1600, ... */) .throwFailureWhenFinished() @@ -208,8 +332,9 @@ protected void refreshFromMetadataLocation( } this.currentMetadata = newMetadata.get(); - this.currentMetadataLocation = newLocation; - this.version = parseVersion(newLocation); + this.currentMetadataLocation = newMetadataFile.location(); + this.currentMetadataFile = newMetadataFile; + this.version = parseVersion(newMetadataFile.location()); } this.shouldRefresh = false; } diff --git a/core/src/main/java/org/apache/iceberg/MetadataFile.java b/core/src/main/java/org/apache/iceberg/MetadataFile.java new file mode 100644 index 000000000000..8120c4b63d16 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/MetadataFile.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.io.Serializable; + +public class MetadataFile implements Serializable { + private final String location; + private final String wrappedKeyMetadata; + private final long size; + + public MetadataFile(String location, String wrappedKeyMetadata, long size) { + this.location = location; + this.wrappedKeyMetadata = wrappedKeyMetadata; + this.size = size; + } + + /** Location of metadata file. */ + public String location() { + return location; + } + + /** + * Key metadata for metadata file in encrypted table. If contains the file encryption key - this key cannot be open, + * must be wrapped (encrypted) with the table master key. + * + * @return base64-encoded key metadata for the metadata file + */ + public String wrappedKeyMetadata() { + return wrappedKeyMetadata; + } + + /** + * In encrypted tables, return the size of metadata file. Must be a verified value, taken from a + * trusted source. In unencrypted tables, can return 0. + */ + public long size() { + return size; + } +} diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index 9e0055a10376..24cfe9e7c287 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.UUID; import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.encryption.EncryptingFileIO; import org.apache.iceberg.encryption.EncryptionManager; import org.apache.iceberg.hadoop.HadoopConfigurable; import org.apache.iceberg.io.FileIO; @@ -113,8 +114,13 @@ private String metadataFileLocation(Table table) { } private FileIO fileIO(Table table) { - if (table.io() instanceof HadoopConfigurable) { - ((HadoopConfigurable) table.io()).serializeConfWith(SerializableConfSupplier::new); + FileIO fileIO = table.io(); + if (fileIO instanceof EncryptingFileIO) { + fileIO = ((EncryptingFileIO) fileIO).sourceFileIO(); + } + + if (fileIO instanceof HadoopConfigurable) { + ((HadoopConfigurable) fileIO).serializeConfWith(SerializableConfSupplier::new); } return table.io(); diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 8bda184142cd..01d1da9c645b 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -25,7 +25,9 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.StringWriter; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -34,10 +36,16 @@ import java.util.zip.GZIPOutputStream; import org.apache.iceberg.TableMetadata.MetadataLogEntry; import org.apache.iceberg.TableMetadata.SnapshotLogEntry; +import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.encryption.EncryptionKeyMetadata; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; +import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -111,18 +119,19 @@ private TableMetadataParser() {} static final String STATISTICS = "statistics"; static final String PARTITION_STATISTICS = "partition-statistics"; - public static void overwrite(TableMetadata metadata, OutputFile outputFile) { - internalWrite(metadata, outputFile, true); + public static long overwrite(TableMetadata metadata, OutputFile outputFile) { + return internalWrite(metadata, outputFile, true); } - public static void write(TableMetadata metadata, OutputFile outputFile) { - internalWrite(metadata, outputFile, false); + public static long write(TableMetadata metadata, OutputFile outputFile) { + return internalWrite(metadata, outputFile, false); } - public static void internalWrite( + public static long internalWrite( TableMetadata metadata, OutputFile outputFile, boolean overwrite) { boolean isGzip = Codec.fromFileName(outputFile.location()) == Codec.GZIP; OutputStream stream = overwrite ? outputFile.createOrOverwrite() : outputFile.create(); + try (OutputStream ou = isGzip ? new GZIPOutputStream(stream) : stream; OutputStreamWriter writer = new OutputStreamWriter(ou, StandardCharsets.UTF_8)) { JsonGenerator generator = JsonUtil.factory().createGenerator(writer); @@ -132,6 +141,17 @@ public static void internalWrite( } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write json to file: %s", outputFile); } + + // Get file length + if (stream instanceof PositionOutputStream) { + try { + return ((PositionOutputStream) stream).storedLength(); + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to get json file length: %s", outputFile); + } + } + + return -1L; } public static String getFileExtension(String codecName) { @@ -273,6 +293,45 @@ public static TableMetadata read(FileIO io, String path) { return read(io, io.newInputFile(path)); } + public static TableMetadata read(FileIO io, MetadataFile metadataFile) { + if (metadataFile.wrappedKeyMetadata() == null) { + return read(io, io.newInputFile(metadataFile.location())); + } else { + Preconditions.checkArgument( + io instanceof EncryptingFileIO, + "Cannot read table metadata (%s) because it is encrypted but the configured " + + "FileIO (%s) does not implement EncryptingFileIO", + io.getClass()); + EncryptingFileIO encryptingFileIO = (EncryptingFileIO) io; + + Preconditions.checkArgument( + encryptingFileIO.encryptionManager() instanceof StandardEncryptionManager, + "Cannot decrypt table metadata because the encryption manager (%s) does not " + + "implement StandardEncryptionManager", + encryptingFileIO.encryptionManager().getClass()); + + StandardEncryptionManager standardEncryptionManager = + (StandardEncryptionManager) encryptingFileIO.encryptionManager(); + + ByteBuffer keyMetadataBytes = + ByteBuffer.wrap(Base64.getDecoder().decode(metadataFile.wrappedKeyMetadata())); + + // Unwrap (decrypt) metadata file key + NativeEncryptionKeyMetadata keyMetadata = EncryptionUtil.parseKeyMetadata(keyMetadataBytes); + ByteBuffer unwrappedManfestListKey = + standardEncryptionManager.unwrapKey(keyMetadata.encryptionKey()); + + EncryptionKeyMetadata unwrappedKeyMetadata = + EncryptionUtil.createKeyMetadata(unwrappedManfestListKey, keyMetadata.aadPrefix()); + + InputFile input = + encryptingFileIO.newDecryptingInputFile( + metadataFile.location(), metadataFile.size(), unwrappedKeyMetadata.buffer()); + + return read(io, input); + } + } + public static TableMetadata read(FileIO io, InputFile file) { Codec codec = Codec.fromFileName(file.location()); try (InputStream is = diff --git a/core/src/main/java/org/apache/iceberg/encryption/AesGcmOutputStream.java b/core/src/main/java/org/apache/iceberg/encryption/AesGcmOutputStream.java index da437b7540db..0922a67622c9 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/AesGcmOutputStream.java +++ b/core/src/main/java/org/apache/iceberg/encryption/AesGcmOutputStream.java @@ -117,6 +117,10 @@ public void flush() throws IOException { @Override public void close() throws IOException { + if (isClosed) { + return; + } + if (!isHeaderWritten) { writeHeader(); } diff --git a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java index e2cf98bf767f..07512332af76 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java +++ b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java @@ -18,13 +18,12 @@ */ package org.apache.iceberg.encryption; +import java.nio.ByteBuffer; import java.util.Map; import org.apache.iceberg.CatalogProperties; -import org.apache.iceberg.TableProperties; import org.apache.iceberg.common.DynConstructors; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.util.PropertyUtil; public class EncryptionUtil { @@ -71,30 +70,35 @@ public static KeyManagementClient createKmsClient(Map catalogPro } public static EncryptionManager createEncryptionManager( - Map tableProperties, KeyManagementClient kmsClient) { + String tableKeyId, int dekLength, KeyManagementClient kmsClient) { Preconditions.checkArgument(kmsClient != null, "Invalid KMS client: null"); - String tableKeyId = tableProperties.get(TableProperties.ENCRYPTION_TABLE_KEY); if (null == tableKeyId) { // Unencrypted table return PlaintextEncryptionManager.instance(); } - int dataKeyLength = - PropertyUtil.propertyAsInt( - tableProperties, - TableProperties.ENCRYPTION_DEK_LENGTH, - TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT); - Preconditions.checkState( - dataKeyLength == 16 || dataKeyLength == 24 || dataKeyLength == 32, + dekLength == 16 || dekLength == 24 || dekLength == 32, "Invalid data key length: %s (must be 16, 24, or 32)", - dataKeyLength); + dekLength); - return new StandardEncryptionManager(tableKeyId, dataKeyLength, kmsClient); + return new StandardEncryptionManager(tableKeyId, dekLength, kmsClient); } public static EncryptedOutputFile plainAsEncryptedOutput(OutputFile encryptingOutputFile) { return new BaseEncryptedOutputFile(encryptingOutputFile, EncryptionKeyMetadata.empty()); } + + public static EncryptionKeyMetadata createKeyMetadata(ByteBuffer key, ByteBuffer aadPrefix) { + Preconditions.checkState( + key.arrayOffset() == 0, "Invalid key array offset {}", key.arrayOffset()); + Preconditions.checkState( + aadPrefix.arrayOffset() == 0, "Invalid aad array offset {}", aadPrefix.arrayOffset()); + return new StandardKeyMetadata(key.array(), aadPrefix.array()); + } + + public static NativeEncryptionKeyMetadata parseKeyMetadata(ByteBuffer keyMetadataBytes) { + return StandardKeyMetadata.parse(keyMetadataBytes); + } } diff --git a/core/src/main/java/org/apache/iceberg/encryption/KeyManagementClient.java b/core/src/main/java/org/apache/iceberg/encryption/KeyManagementClient.java index a7fb494cc8e1..6f834c69ed86 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/KeyManagementClient.java +++ b/core/src/main/java/org/apache/iceberg/encryption/KeyManagementClient.java @@ -24,7 +24,7 @@ import java.util.Map; /** A minimum client interface to connect to a key management service (KMS). */ -interface KeyManagementClient extends Serializable, Closeable { +public interface KeyManagementClient extends Serializable, Closeable { /** * Wrap a secret key, using a wrapping/master key which is stored in KMS and referenced by an ID. diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java index 0b3a7c4b0507..92ba25af0f1c 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.hadoop; -import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; import java.io.UncheckedIOException; @@ -44,8 +43,6 @@ import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.SupportsNamespaces; import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.encryption.EncryptionUtil; -import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.NamespaceNotEmptyException; import org.apache.iceberg.exceptions.NoSuchNamespaceException; @@ -56,6 +53,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.base.Strings; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Sets; @@ -79,7 +77,7 @@ *

Note: The HadoopCatalog requires that the underlying file system supports atomic rename. */ public class HadoopCatalog extends BaseMetastoreCatalog - implements Closeable, SupportsNamespaces, Configurable { + implements SupportsNamespaces, Configurable { private static final Logger LOG = LoggerFactory.getLogger(HadoopCatalog.class); @@ -98,7 +96,6 @@ public class HadoopCatalog extends BaseMetastoreCatalog private LockManager lockManager; private boolean suppressPermissionError = false; private Map catalogProperties; - private KeyManagementClient keyManagementClient; public HadoopCatalog() {} @@ -107,28 +104,24 @@ public void initialize(String name, Map properties) { this.catalogProperties = ImmutableMap.copyOf(properties); String inputWarehouseLocation = properties.get(CatalogProperties.WAREHOUSE_LOCATION); Preconditions.checkArgument( - inputWarehouseLocation != null && inputWarehouseLocation.length() > 0, + !Strings.isNullOrEmpty(inputWarehouseLocation), "Cannot initialize HadoopCatalog because warehousePath must not be null or empty"); this.catalogName = name; this.warehouseLocation = LocationUtil.stripTrailingSlash(inputWarehouseLocation); this.fs = Util.getFs(new Path(warehouseLocation), conf); - String fileIOImpl = properties.get(CatalogProperties.FILE_IO_IMPL); - this.fileIO = - fileIOImpl == null - ? new HadoopFileIO(conf) - : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); + String fileIOImpl = + properties.getOrDefault( + CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.hadoop.HadoopFileIO"); - if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { - this.keyManagementClient = EncryptionUtil.createKmsClient(properties); - } + this.fileIO = CatalogUtil.loadFileIO(fileIOImpl, properties, conf); this.lockManager = LockManagers.from(properties); this.closeableGroup = new CloseableGroup(); closeableGroup.addCloseable(lockManager); - closeableGroup.addCloseable(keyManagementClient); + closeableGroup.addCloseable(metricsReporter()); closeableGroup.setSuppressCloseFailure(true); this.suppressPermissionError = @@ -234,11 +227,7 @@ protected boolean isValidIdentifier(TableIdentifier identifier) { @Override protected TableOperations newTableOps(TableIdentifier identifier) { return new HadoopTableOperations( - new Path(defaultWarehouseLocation(identifier)), - fileIO, - keyManagementClient, - conf, - lockManager); + new Path(defaultWarehouseLocation(identifier)), fileIO, conf, lockManager); } @Override @@ -272,7 +261,7 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { if (purge) { // Since the data files and the metadata files may store in different locations, // so it has to call dropTableData to force delete the data file. - CatalogUtil.dropTableData(ops.io(), ops.encryption(), lastMetadata); + CatalogUtil.dropTableData(ops.io(), lastMetadata); } return fs.delete(tablePath, true /* recursive */); } diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java index f8ed0520ec41..4e815ceff59a 100644 --- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTableOperations.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.hadoop; -import static org.apache.iceberg.TableProperties.ENCRYPTION_TABLE_KEY; - import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -40,9 +38,6 @@ import org.apache.iceberg.TableOperations; import org.apache.iceberg.TableProperties; import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.EncryptionUtil; -import org.apache.iceberg.encryption.KeyManagementClient; -import org.apache.iceberg.encryption.PlaintextEncryptionManager; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.exceptions.ValidationException; @@ -72,7 +67,6 @@ public class HadoopTableOperations implements TableOperations { private final Path location; private final FileIO fileIO; private final LockManager lockManager; - private final KeyManagementClient keyManagementClient; private volatile TableMetadata currentMetadata = null; private volatile Integer version = null; @@ -80,20 +74,10 @@ public class HadoopTableOperations implements TableOperations { protected HadoopTableOperations( Path location, FileIO fileIO, Configuration conf, LockManager lockManager) { - this(location, fileIO, null, conf, lockManager); - } - - protected HadoopTableOperations( - Path location, - FileIO fileIO, - KeyManagementClient keyManagementClient, - Configuration conf, - LockManager lockManager) { this.conf = conf; this.location = location; this.fileIO = fileIO; this.lockManager = lockManager; - this.keyManagementClient = keyManagementClient; } @Override @@ -194,27 +178,6 @@ public FileIO io() { return fileIO; } - @Override - public EncryptionManager encryption() { - if (keyManagementClient == null) { - return PlaintextEncryptionManager.instance(); - } - - if (current() == null) { - throw new IllegalStateException("No table metadata"); - } - - if (current().formatVersion() < 2) { - if (current().properties().containsKey(ENCRYPTION_TABLE_KEY)) { - throw new IllegalStateException("Encryption is not supported in v1 tables"); - } - - return PlaintextEncryptionManager.instance(); - } - - return EncryptionUtil.createEncryptionManager(current().properties(), keyManagementClient); - } - @Override public LocationProvider locationProvider() { return LocationProviders.locationsFor(current().location(), current().properties()); diff --git a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java index aa49e1c40fe2..cdc8c2e1515b 100644 --- a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java +++ b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java @@ -31,11 +31,10 @@ public static EncryptionManager createEncryptionManager() { Map catalogProperties = Maps.newHashMap(); catalogProperties.put( CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); - Map tableProperties = Maps.newHashMap(); - tableProperties.put(TableProperties.ENCRYPTION_TABLE_KEY, UnitestKMS.MASTER_KEY_NAME1); - tableProperties.put(TableProperties.FORMAT_VERSION, "2"); return EncryptionUtil.createEncryptionManager( - tableProperties, EncryptionUtil.createKmsClient(catalogProperties)); + UnitestKMS.MASTER_KEY_NAME1, + TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT, + EncryptionUtil.createKmsClient(catalogProperties)); } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 239293f799c4..d2377d2ab861 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -40,12 +40,14 @@ import org.apache.iceberg.BaseMetastoreOperations; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; +import org.apache.iceberg.MetadataFile; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.SortOrderParser; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; +import org.apache.iceberg.encryption.EncryptingFileIO; import org.apache.iceberg.encryption.EncryptionManager; import org.apache.iceberg.encryption.EncryptionUtil; import org.apache.iceberg.encryption.KeyManagementClient; @@ -113,8 +115,9 @@ public static String translateToIcebergProp(String hmsProp) { private final FileIO fileIO; private final KeyManagementClient keyManagementClient; private final ClientPool metaClients; - private boolean fetchedTableKeyID = false; - private String tableEncryptionKeyID; + + private EncryptionManager encryptionManager; + private EncryptingFileIO encryptingFileIO; /** Tests only */ protected HiveTableOperations( @@ -158,38 +161,69 @@ protected String tableName() { @Override public FileIO io() { - return fileIO; + if (encryptingFileIO != null) { + return encryptingFileIO; + } + + encryptingFileIO = EncryptingFileIO.combine(fileIO, encryption()); + return encryptingFileIO; } @Override public EncryptionManager encryption() { + if (encryptionManager != null) { + return encryptionManager; + } + if (keyManagementClient == null) { - return PlaintextEncryptionManager.instance(); + encryptionManager = PlaintextEncryptionManager.instance(); + return encryptionManager; } - if (current() == null) { - throw new IllegalStateException("No table metadata"); + String key = encryptionKeyIdFromProps(); // writing + + if (key == null) { + key = encryptionKeyIdFromHms(); // reading } - if (current().formatVersion() < 2) { - if (current().properties().containsKey(ENCRYPTION_TABLE_KEY)) { - throw new IllegalStateException("Encryption is not supported in v1 tables"); - } + encryptionManager = + EncryptionUtil.createEncryptionManager(key, dekLength(), keyManagementClient); - return PlaintextEncryptionManager.instance(); + return encryptionManager; + } + + private String encryptionKeyIdFromHms() { + String keyID; + try { + Table table = loadHmsTable(); + keyID = table.getParameters().get(TableProperties.ENCRYPTION_TABLE_KEY); + } catch (TException e) { + String errMsg = + String.format("Failed to get table info from metastore %s.%s", database, tableName); + throw new RuntimeException(errMsg, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted during encryption key id retrieval", e); } - return EncryptionUtil.createEncryptionManager(current().properties(), keyManagementClient); + return keyID; } @Override protected void doRefresh() { String metadataLocation = null; + String metadataKey = null; + long metadataSize = -1L; try { Table table = metaClients.run(client -> client.getTable(database, tableName)); HiveOperationsBase.validateTableIsIceberg(table, fullName); metadataLocation = table.getParameters().get(METADATA_LOCATION_PROP); + metadataKey = table.getParameters().get(METADATA_WRAPPED_KEY_PROP); + String metadataSizeString = table.getParameters().get(METADATA_SIZE_PROP); + if (metadataSizeString != null) { + metadataSize = Long.valueOf(metadataSizeString); + } } catch (NoSuchObjectException e) { if (currentMetadataLocation() != null) { @@ -206,14 +240,16 @@ protected void doRefresh() { throw new RuntimeException("Interrupted during refresh", e); } - refreshFromMetadataLocation(metadataLocation, metadataRefreshMaxRetries); + MetadataFile metadataFile = new MetadataFile(metadataLocation, metadataKey, metadataSize); + + refreshFromMetadataLocation(metadataFile, metadataRefreshMaxRetries); } @SuppressWarnings("checkstyle:CyclomaticComplexity") @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { boolean newTable = base == null; - String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); + MetadataFile newMetadataFile = writeNewMetadataFileIfRequired(newTable, metadata); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); @@ -277,7 +313,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { .map(Snapshot::summary) .orElseGet(ImmutableMap::of); setHmsTableParameters( - newMetadataLocation, tbl, metadata, removedProps, hiveEngineEnabled, summary); + newMetadataFile, tbl, metadata, removedProps, hiveEngineEnabled, summary); if (!keepHiveStats) { tbl.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); @@ -336,7 +372,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { e); commitStatus = BaseMetastoreOperations.CommitStatus.valueOf( - checkCommitStatus(newMetadataLocation, metadata).name()); + checkCommitStatus(newMetadataFile.location(), metadata).name()); switch (commitStatus) { case SUCCESS: break; @@ -358,15 +394,16 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw new CommitFailedException(e); } finally { - HiveOperationsBase.cleanupMetadataAndUnlock(io(), commitStatus, newMetadataLocation, lock); + HiveOperationsBase.cleanupMetadataAndUnlock(io(), commitStatus, newMetadataFile.location(), lock); } LOG.info( - "Committed to table {} with the new metadata location {}", fullName, newMetadataLocation); + "Committed to table {} with the new metadata location {}", fullName, newMetadataFile.location()); } + @SuppressWarnings("checkstyle:CyclomaticComplexity") private void setHmsTableParameters( - String newMetadataLocation, + MetadataFile newMetadataFile, Table tbl, TableMetadata metadata, Set obsoleteProps, @@ -393,12 +430,32 @@ private void setHmsTableParameters( obsoleteProps.forEach(parameters::remove); parameters.put(TABLE_TYPE_PROP, ICEBERG_TABLE_TYPE_VALUE.toUpperCase(Locale.ENGLISH)); - parameters.put(METADATA_LOCATION_PROP, newMetadataLocation); + parameters.put(METADATA_LOCATION_PROP, newMetadataFile.location()); + + if (newMetadataFile.wrappedKeyMetadata() != null) { + parameters.put(METADATA_WRAPPED_KEY_PROP, newMetadataFile.wrappedKeyMetadata()); + } + + if (newMetadataFile.size() > 0) { + parameters.put(METADATA_SIZE_PROP, Long.toString(newMetadataFile.size())); + } if (currentMetadataLocation() != null && !currentMetadataLocation().isEmpty()) { parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation()); } + if (currentMetadataFile() != null) { + if (currentMetadataFile().wrappedKeyMetadata() != null + && !currentMetadataFile().wrappedKeyMetadata().isEmpty()) { + parameters.put( + PREVIOUS_METADATA_WRAPPED_KEY_PROP, currentMetadataFile().wrappedKeyMetadata()); + } + + if (currentMetadataFile().size() > 0) { + parameters.put(PREVIOUS_METADATA_SIZE_PROP, Long.toString(currentMetadataFile().size())); + } + } + // If needed set the 'storage_handler' property to enable query from Hive if (hiveEngineEnabled) { parameters.put( diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java index 7ff507ed0694..42b151b275b1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java @@ -26,6 +26,8 @@ import org.apache.iceberg.Parameters; import org.apache.iceberg.Table; import org.apache.iceberg.TestHelpers; +import org.apache.iceberg.encryption.EncryptingFileIO; +import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.spark.CatalogTestBase; import org.apache.iceberg.spark.SparkCatalog; @@ -90,7 +92,13 @@ public void dropTable() { @TestTemplate public void testTableFromCatalogHasOverrides() throws Exception { Table table = getIcebergTableFromSparkCatalog(); - Configuration conf = ((Configurable) table.io()).getConf(); + FileIO io = table.io(); + + if (io instanceof EncryptingFileIO) { + io = ((EncryptingFileIO) io).sourceFileIO(); + } + + Configuration conf = ((Configurable) io).getConf(); String actualCatalogOverride = conf.get(configToOverride, "/whammies"); assertThat(actualCatalogOverride) .as( @@ -101,7 +109,13 @@ public void testTableFromCatalogHasOverrides() throws Exception { @TestTemplate public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception { Table table = getIcebergTableFromSparkCatalog(); - Configuration originalConf = ((Configurable) table.io()).getConf(); + FileIO io = table.io(); + + if (io instanceof EncryptingFileIO) { + io = ((EncryptingFileIO) io).sourceFileIO(); + } + + Configuration originalConf = ((Configurable) io).getConf(); String actualCatalogOverride = originalConf.get(configToOverride, "/whammies"); assertThat(actualCatalogOverride) .as( @@ -112,7 +126,13 @@ public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception Table serializableTable = SerializableTableWithSize.copyOf(table); Table kryoSerializedTable = KryoHelpers.roundTripSerialize(SerializableTableWithSize.copyOf(table)); - Configuration configFromKryoSerde = ((Configurable) kryoSerializedTable.io()).getConf(); + io = kryoSerializedTable.io(); + + if (io instanceof EncryptingFileIO) { + io = ((EncryptingFileIO) io).sourceFileIO(); + } + + Configuration configFromKryoSerde = ((Configurable) io).getConf(); String kryoSerializedCatalogOverride = configFromKryoSerde.get(configToOverride, "/whammies"); assertThat(kryoSerializedCatalogOverride) .as( @@ -121,7 +141,14 @@ public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception // Do the same for Java based serde Table javaSerializedTable = TestHelpers.roundTripSerialize(serializableTable); - Configuration configFromJavaSerde = ((Configurable) javaSerializedTable.io()).getConf(); + // Configuration configFromJavaSerde = ((Configurable) javaSerializedTable.io()).getConf(); + io = javaSerializedTable.io(); + + if (io instanceof EncryptingFileIO) { + io = ((EncryptingFileIO) io).sourceFileIO(); + } + + Configuration configFromJavaSerde = ((Configurable) io).getConf(); String javaSerializedCatalogOverride = configFromJavaSerde.get(configToOverride, "/whammies"); assertThat(javaSerializedCatalogOverride) .as( diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java index 9b61c6f6f7be..1ba2616cdd61 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestAvroTableEncryption.java @@ -21,7 +21,6 @@ import static org.apache.iceberg.Files.localInput; import static org.apache.iceberg.types.Types.NestedField.optional; -import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.List; @@ -30,6 +29,7 @@ import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.Parameters; import org.apache.iceberg.Schema; import org.apache.iceberg.avro.Avro; import org.apache.iceberg.encryption.Ciphers; @@ -39,72 +39,52 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.spark.CatalogTestBase; import org.apache.iceberg.spark.SparkCatalogConfig; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; -@RunWith(Parameterized.class) -public class TestAvroTableEncryption extends SparkTestBaseWithCatalog { +public class TestAvroTableEncryption extends CatalogTestBase { - private static Map appendCatalogEncryptionConfigProperties( - Map props) { + private static Map appendCatalogEncryptionProperties(Map props) { Map newProps = Maps.newHashMap(); newProps.putAll(props); newProps.put(CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); return newProps; } - // these parameters are broken out to avoid changes that need to modify lots of test suites - @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") - public static Object[][] parameters() { + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + protected static Object[][] parameters() { return new Object[][] { { SparkCatalogConfig.HIVE.catalogName(), SparkCatalogConfig.HIVE.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HIVE.properties()) - }, - { - SparkCatalogConfig.HADOOP.catalogName(), - SparkCatalogConfig.HADOOP.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HADOOP.properties()) - }, - { - SparkCatalogConfig.SPARK.catalogName(), - SparkCatalogConfig.SPARK.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.SPARK.properties()) + appendCatalogEncryptionProperties(SparkCatalogConfig.HIVE.properties()) } }; } - public TestAvroTableEncryption( - String catalogName, String implementation, Map config) { - super(catalogName, implementation, config); - } - - @Before - public void createTables() throws IOException { + @BeforeEach + public void createTables() { sql( "CREATE TABLE %s (id bigint, data string, float float) USING iceberg " + "TBLPROPERTIES ( " + "'encryption.key-id'='%s' , " - + "'write.format.default'='AVRO' , " - + "'format-version'='2')", + + "'write.format.default'='AVRO')", tableName, UnitestKMS.MASTER_KEY_NAME1); + sql("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', float('NaN'))", tableName); } - @After + @AfterEach public void removeTables() { sql("DROP TABLE IF EXISTS %s", tableName); } - @Test + @TestTemplate public void testSelect() { List expected = ImmutableList.of(row(1L, "a", 1.0F), row(2L, "b", 2.0F), row(3L, "c", Float.NaN)); @@ -112,7 +92,7 @@ public void testSelect() { assertEquals("Should return all expected rows", expected, sql("SELECT * FROM %s", tableName)); } - @Test + @TestTemplate public void testDirectDataFileRead() throws IOException { List dataFileTable = sql("SELECT file_path FROM %s.%s", tableName, MetadataTableType.ALL_DATA_FILES); @@ -137,53 +117,4 @@ public void testDirectDataFileRead() throws IOException { magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); } } - - @Test - public void testManifestEncryption() throws IOException { - List manifestFileTable = - sql("SELECT path FROM %s.%s", tableName, MetadataTableType.MANIFESTS); - - List manifestFiles = - Streams.concat(manifestFileTable.stream()) - .map(row -> (String) row[0]) - .collect(Collectors.toList()); - - if (!(manifestFiles.size() > 0)) { - throw new RuntimeException("No manifest files found for table " + tableName); - } - - String metadataFolderPath = null; - byte[] magic = new byte[4]; - - // Check encryption of manifest files - for (String manifestFilePath : manifestFiles) { - SeekableInputStream manifestFileReader = localInput(manifestFilePath).newStream(); - manifestFileReader.read(magic); - manifestFileReader.close(); - Assert.assertArrayEquals( - magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); - - if (metadataFolderPath == null) { - metadataFolderPath = new File(manifestFilePath).getParent().replaceFirst("file:", ""); - } - } - - // Find metadata list files and check their encryption - File[] listOfMetadataFiles = new File(metadataFolderPath).listFiles(); - boolean foundManifestListFile = false; - for (File metadataFile : listOfMetadataFiles) { - if (metadataFile.getName().startsWith("snap-")) { - foundManifestListFile = true; - SeekableInputStream manifestFileReader = localInput(metadataFile).newStream(); - manifestFileReader.read(magic); - manifestFileReader.close(); - Assert.assertArrayEquals( - magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); - } - } - - if (!foundManifestListFile) { - throw new RuntimeException("No manifest list files found for table " + tableName); - } - } } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java index df8741eebc75..54bd4089e090 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -30,80 +30,62 @@ import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.MetadataTableType; +import org.apache.iceberg.Parameters; import org.apache.iceberg.Schema; import org.apache.iceberg.encryption.Ciphers; import org.apache.iceberg.encryption.UnitestKMS; +import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.SeekableInputStream; import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.spark.CatalogTestBase; import org.apache.iceberg.spark.SparkCatalogConfig; -import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; import org.apache.parquet.crypto.ParquetCryptoRuntimeException; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; -@RunWith(Parameterized.class) -public class TestTableEncryption extends SparkTestBaseWithCatalog { +public class TestTableEncryption extends CatalogTestBase { - private static Map appendCatalogEncryptionConfigProperties( - Map props) { + private static Map appendCatalogEncryptionProperties(Map props) { Map newProps = Maps.newHashMap(); newProps.putAll(props); newProps.put(CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); return newProps; } - // these parameters are broken out to avoid changes that need to modify lots of test suites - @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") - public static Object[][] parameters() { + @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}") + protected static Object[][] parameters() { return new Object[][] { { SparkCatalogConfig.HIVE.catalogName(), SparkCatalogConfig.HIVE.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HIVE.properties()) - }, - { - SparkCatalogConfig.HADOOP.catalogName(), - SparkCatalogConfig.HADOOP.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.HADOOP.properties()) - }, - { - SparkCatalogConfig.SPARK.catalogName(), - SparkCatalogConfig.SPARK.implementation(), - appendCatalogEncryptionConfigProperties(SparkCatalogConfig.SPARK.properties()) + appendCatalogEncryptionProperties(SparkCatalogConfig.HIVE.properties()) } }; } - public TestTableEncryption( - String catalogName, String implementation, Map config) { - super(catalogName, implementation, config); - } - - @Before - public void createTables() throws IOException { + @BeforeEach + public void createTables() { sql( "CREATE TABLE %s (id bigint, data string, float float) USING iceberg " + "TBLPROPERTIES ( " - + "'encryption.key-id'='%s' , " - + "'format-version'='2')", + + "'encryption.key-id'='%s')", tableName, UnitestKMS.MASTER_KEY_NAME1); + sql("INSERT INTO %s VALUES (1, 'a', 1.0), (2, 'b', 2.0), (3, 'c', float('NaN'))", tableName); } - @After + @AfterEach public void removeTables() { sql("DROP TABLE IF EXISTS %s", tableName); } - @Test + @TestTemplate public void testSelect() { List expected = ImmutableList.of(row(1L, "a", 1.0F), row(2L, "b", 2.0F), row(3L, "c", Float.NaN)); @@ -111,7 +93,7 @@ public void testSelect() { assertEquals("Should return all expected rows", expected, sql("SELECT * FROM %s", tableName)); } - @Test + @TestTemplate public void testDirectDataFileRead() { List dataFileTable = sql("SELECT file_path FROM %s.%s", tableName, MetadataTableType.ALL_DATA_FILES); @@ -135,7 +117,7 @@ public void testDirectDataFileRead() { } } - @Test + @TestTemplate public void testManifestEncryption() throws IOException { List manifestFileTable = sql("SELECT path FROM %s.%s", tableName, MetadataTableType.MANIFESTS); @@ -150,37 +132,48 @@ public void testManifestEncryption() throws IOException { } String metadataFolderPath = null; - byte[] magic = new byte[4]; // Check encryption of manifest files for (String manifestFilePath : manifestFiles) { - SeekableInputStream manifestFileReader = localInput(manifestFilePath).newStream(); - manifestFileReader.read(magic); - manifestFileReader.close(); - Assert.assertArrayEquals( - magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + checkMetadataFileEncryption(localInput(manifestFilePath)); if (metadataFolderPath == null) { metadataFolderPath = new File(manifestFilePath).getParent().replaceFirst("file:", ""); } } - // Find metadata list files and check their encryption + // Find manifest list and metadata files; check their encryption File[] listOfMetadataFiles = new File(metadataFolderPath).listFiles(); boolean foundManifestListFile = false; + boolean foundMetadataJson = false; + for (File metadataFile : listOfMetadataFiles) { if (metadataFile.getName().startsWith("snap-")) { foundManifestListFile = true; - SeekableInputStream manifestFileReader = localInput(metadataFile).newStream(); - manifestFileReader.read(magic); - manifestFileReader.close(); - Assert.assertArrayEquals( - magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); + checkMetadataFileEncryption(localInput(metadataFile)); + } + + if (metadataFile.getName().endsWith("metadata.json")) { + foundMetadataJson = true; + checkMetadataFileEncryption(localInput(metadataFile)); } } if (!foundManifestListFile) { throw new RuntimeException("No manifest list files found for table " + tableName); } + + if (!foundMetadataJson) { + throw new RuntimeException("Metadata json file is not found for table " + tableName); + } + } + + private void checkMetadataFileEncryption(InputFile file) throws IOException { + SeekableInputStream stream = file.newStream(); + byte[] magic = new byte[4]; + stream.read(magic); + stream.close(); + Assert.assertArrayEquals( + magic, Ciphers.GCM_STREAM_MAGIC_STRING.getBytes(StandardCharsets.UTF_8)); } } From 7a61a2e309823ecd3033c2a059cae88487da77f6 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 21 Apr 2024 11:31:41 +0300 Subject: [PATCH 08/20] spotlessApply --- core/src/main/java/org/apache/iceberg/MetadataFile.java | 4 ++-- .../java/org/apache/iceberg/hive/HiveTableOperations.java | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/MetadataFile.java b/core/src/main/java/org/apache/iceberg/MetadataFile.java index 8120c4b63d16..a6ec74b14392 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataFile.java +++ b/core/src/main/java/org/apache/iceberg/MetadataFile.java @@ -37,8 +37,8 @@ public String location() { } /** - * Key metadata for metadata file in encrypted table. If contains the file encryption key - this key cannot be open, - * must be wrapped (encrypted) with the table master key. + * Key metadata for metadata file in encrypted table. If contains the file encryption key - this + * key cannot be open, must be wrapped (encrypted) with the table master key. * * @return base64-encoded key metadata for the metadata file */ diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index d2377d2ab861..8dd270ee6164 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -18,7 +18,6 @@ */ package org.apache.iceberg.hive; -import static org.apache.iceberg.TableProperties.ENCRYPTION_TABLE_KEY; import static org.apache.iceberg.TableProperties.GC_ENABLED; import com.fasterxml.jackson.core.JsonProcessingException; @@ -394,11 +393,14 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw new CommitFailedException(e); } finally { - HiveOperationsBase.cleanupMetadataAndUnlock(io(), commitStatus, newMetadataFile.location(), lock); + HiveOperationsBase.cleanupMetadataAndUnlock( + io(), commitStatus, newMetadataFile.location(), lock); } LOG.info( - "Committed to table {} with the new metadata location {}", fullName, newMetadataFile.location()); + "Committed to table {} with the new metadata location {}", + fullName, + newMetadataFile.location()); } @SuppressWarnings("checkstyle:CyclomaticComplexity") From a59e3f50b6e83786e1204dfb41643fd897e63b05 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 21 Apr 2024 11:42:43 +0300 Subject: [PATCH 09/20] clean up --- .palantir/revapi.yml | 6 ++++++ .../java/org/apache/iceberg/hive/HiveTableOperations.java | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 3eeda0168978..401c72b286aa 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1029,6 +1029,12 @@ acceptedBreaks: \ java.nio.ByteBuffer)" justification: "Decrypting input must receive length" org.apache.iceberg:iceberg-core: + - code: "java.method.numberOfParametersChanged" + old: "method org.apache.iceberg.encryption.EncryptionManager org.apache.iceberg.encryption.EncryptionUtil::createEncryptionManager(java.util.Map, org.apache.iceberg.encryption.KeyManagementClient)" + new: "method org.apache.iceberg.encryption.EncryptionManager org.apache.iceberg.encryption.EncryptionUtil::createEncryptionManager(java.lang.String,\ + \ int, org.apache.iceberg.encryption.KeyManagementClient)" + justification: "Create from table key id and dek length" - code: "java.method.returnTypeChanged" old: "method void org.apache.iceberg.TableMetadataParser::internalWrite(org.apache.iceberg.TableMetadata,\ \ org.apache.iceberg.io.OutputFile, boolean)" diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 8dd270ee6164..d75680639e81 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -179,14 +179,14 @@ public EncryptionManager encryption() { return encryptionManager; } - String key = encryptionKeyIdFromProps(); // writing + String tableKeyID = encryptionKeyIdFromProps(); // writing - if (key == null) { - key = encryptionKeyIdFromHms(); // reading + if (tableKeyID == null) { + tableKeyID = encryptionKeyIdFromHms(); // reading } encryptionManager = - EncryptionUtil.createEncryptionManager(key, dekLength(), keyManagementClient); + EncryptionUtil.createEncryptionManager(tableKeyID, dekLength(), keyManagementClient); return encryptionManager; } From 7c92d0613c6c70361c897418e2cac3c9a7c7e34e Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 21 Apr 2024 11:58:49 +0300 Subject: [PATCH 10/20] fix drop table call --- .../src/main/java/org/apache/iceberg/hive/HiveCatalog.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index dff03f2f4707..d958568aa1ea 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -197,7 +197,7 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) { }); if (purge && lastMetadata != null) { - CatalogUtil.dropTableData(ops.io(), ops.encryption(), lastMetadata); + CatalogUtil.dropTableData(ops.io(), lastMetadata); } LOG.info("Dropped table: {}", identifier); From 14e1f76f4d93acd2d883f8f743361d0271ce9889 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 1 May 2024 11:26:52 +0300 Subject: [PATCH 11/20] use EncryptingFileIO only for encrypted tables --- .../iceberg/encryption/EncryptingFileIO.java | 4 --- .../org/apache/iceberg/SerializableTable.java | 10 ++---- .../iceberg/hive/HiveTableOperations.java | 33 +++++++++++++---- .../TestSparkCatalogHadoopOverrides.java | 35 +++---------------- 4 files changed, 32 insertions(+), 50 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java index a0c7798bc20e..18e83ce8fe48 100644 --- a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java +++ b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java @@ -73,10 +73,6 @@ public EncryptionManager encryptionManager() { return em; } - public FileIO sourceFileIO() { - return io; - } - @Override public InputFile newInputFile(String path) { return io.newInputFile(path); diff --git a/core/src/main/java/org/apache/iceberg/SerializableTable.java b/core/src/main/java/org/apache/iceberg/SerializableTable.java index 24cfe9e7c287..9e0055a10376 100644 --- a/core/src/main/java/org/apache/iceberg/SerializableTable.java +++ b/core/src/main/java/org/apache/iceberg/SerializableTable.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.UUID; import org.apache.hadoop.conf.Configuration; -import org.apache.iceberg.encryption.EncryptingFileIO; import org.apache.iceberg.encryption.EncryptionManager; import org.apache.iceberg.hadoop.HadoopConfigurable; import org.apache.iceberg.io.FileIO; @@ -114,13 +113,8 @@ private String metadataFileLocation(Table table) { } private FileIO fileIO(Table table) { - FileIO fileIO = table.io(); - if (fileIO instanceof EncryptingFileIO) { - fileIO = ((EncryptingFileIO) fileIO).sourceFileIO(); - } - - if (fileIO instanceof HadoopConfigurable) { - ((HadoopConfigurable) fileIO).serializeConfWith(SerializableConfSupplier::new); + if (table.io() instanceof HadoopConfigurable) { + ((HadoopConfigurable) table.io()).serializeConfWith(SerializableConfSupplier::new); } return table.io(); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index d75680639e81..4d500af2fc42 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -117,6 +117,7 @@ public static String translateToIcebergProp(String hmsProp) { private EncryptionManager encryptionManager; private EncryptingFileIO encryptingFileIO; + private boolean encryptedTable; /** Tests only */ protected HiveTableOperations( @@ -151,6 +152,7 @@ protected HiveTableOperations( HIVE_ICEBERG_METADATA_REFRESH_MAX_RETRIES_DEFAULT); this.maxHiveTablePropertySize = conf.getLong(HIVE_TABLE_PROPERTY_MAX_SIZE, HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); + this.encryptedTable = false; } @Override @@ -160,6 +162,14 @@ protected String tableName() { @Override public FileIO io() { + if (encryptionManager == null) { + encryptionManager = encryption(); + } + + if (!encryptedTable) { + return fileIO; + } + if (encryptingFileIO != null) { return encryptingFileIO; } @@ -174,19 +184,24 @@ public EncryptionManager encryption() { return encryptionManager; } - if (keyManagementClient == null) { - encryptionManager = PlaintextEncryptionManager.instance(); - return encryptionManager; - } - String tableKeyID = encryptionKeyIdFromProps(); // writing if (tableKeyID == null) { tableKeyID = encryptionKeyIdFromHms(); // reading } - encryptionManager = - EncryptionUtil.createEncryptionManager(tableKeyID, dekLength(), keyManagementClient); + if (tableKeyID != null) { + if (keyManagementClient == null) { + throw new RuntimeException( + "Cant create encryption manager, because key management client is not set"); + } + + encryptedTable = true; + encryptionManager = + EncryptionUtil.createEncryptionManager(tableKeyID, dekLength(), keyManagementClient); + } else { + encryptionManager = PlaintextEncryptionManager.instance(); + } return encryptionManager; } @@ -195,6 +210,10 @@ private String encryptionKeyIdFromHms() { String keyID; try { Table table = loadHmsTable(); + if (table == null) { + return null; + } + keyID = table.getParameters().get(TableProperties.ENCRYPTION_TABLE_KEY); } catch (TException e) { String errMsg = diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java index 42b151b275b1..7ff507ed0694 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkCatalogHadoopOverrides.java @@ -26,8 +26,6 @@ import org.apache.iceberg.Parameters; import org.apache.iceberg.Table; import org.apache.iceberg.TestHelpers; -import org.apache.iceberg.encryption.EncryptingFileIO; -import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.spark.CatalogTestBase; import org.apache.iceberg.spark.SparkCatalog; @@ -92,13 +90,7 @@ public void dropTable() { @TestTemplate public void testTableFromCatalogHasOverrides() throws Exception { Table table = getIcebergTableFromSparkCatalog(); - FileIO io = table.io(); - - if (io instanceof EncryptingFileIO) { - io = ((EncryptingFileIO) io).sourceFileIO(); - } - - Configuration conf = ((Configurable) io).getConf(); + Configuration conf = ((Configurable) table.io()).getConf(); String actualCatalogOverride = conf.get(configToOverride, "/whammies"); assertThat(actualCatalogOverride) .as( @@ -109,13 +101,7 @@ public void testTableFromCatalogHasOverrides() throws Exception { @TestTemplate public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception { Table table = getIcebergTableFromSparkCatalog(); - FileIO io = table.io(); - - if (io instanceof EncryptingFileIO) { - io = ((EncryptingFileIO) io).sourceFileIO(); - } - - Configuration originalConf = ((Configurable) io).getConf(); + Configuration originalConf = ((Configurable) table.io()).getConf(); String actualCatalogOverride = originalConf.get(configToOverride, "/whammies"); assertThat(actualCatalogOverride) .as( @@ -126,13 +112,7 @@ public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception Table serializableTable = SerializableTableWithSize.copyOf(table); Table kryoSerializedTable = KryoHelpers.roundTripSerialize(SerializableTableWithSize.copyOf(table)); - io = kryoSerializedTable.io(); - - if (io instanceof EncryptingFileIO) { - io = ((EncryptingFileIO) io).sourceFileIO(); - } - - Configuration configFromKryoSerde = ((Configurable) io).getConf(); + Configuration configFromKryoSerde = ((Configurable) kryoSerializedTable.io()).getConf(); String kryoSerializedCatalogOverride = configFromKryoSerde.get(configToOverride, "/whammies"); assertThat(kryoSerializedCatalogOverride) .as( @@ -141,14 +121,7 @@ public void ensureRoundTripSerializedTableRetainsHadoopConfig() throws Exception // Do the same for Java based serde Table javaSerializedTable = TestHelpers.roundTripSerialize(serializableTable); - // Configuration configFromJavaSerde = ((Configurable) javaSerializedTable.io()).getConf(); - io = javaSerializedTable.io(); - - if (io instanceof EncryptingFileIO) { - io = ((EncryptingFileIO) io).sourceFileIO(); - } - - Configuration configFromJavaSerde = ((Configurable) io).getConf(); + Configuration configFromJavaSerde = ((Configurable) javaSerializedTable.io()).getConf(); String javaSerializedCatalogOverride = configFromJavaSerde.get(configToOverride, "/whammies"); assertThat(javaSerializedCatalogOverride) .as( From 5ff8656deda4c6e5179312bf16da91ed0968d55f Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Wed, 1 May 2024 15:38:29 +0300 Subject: [PATCH 12/20] write length only for encrypted metadata --- .../iceberg/hive/HiveTableOperations.java | 21 +++++++------------ 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 4d500af2fc42..fa7ff5450d6d 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -174,7 +174,7 @@ public FileIO io() { return encryptingFileIO; } - encryptingFileIO = EncryptingFileIO.combine(fileIO, encryption()); + encryptingFileIO = EncryptingFileIO.combine(fileIO, encryptionManager); return encryptingFileIO; } @@ -455,9 +455,6 @@ private void setHmsTableParameters( if (newMetadataFile.wrappedKeyMetadata() != null) { parameters.put(METADATA_WRAPPED_KEY_PROP, newMetadataFile.wrappedKeyMetadata()); - } - - if (newMetadataFile.size() > 0) { parameters.put(METADATA_SIZE_PROP, Long.toString(newMetadataFile.size())); } @@ -465,16 +462,12 @@ private void setHmsTableParameters( parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation()); } - if (currentMetadataFile() != null) { - if (currentMetadataFile().wrappedKeyMetadata() != null - && !currentMetadataFile().wrappedKeyMetadata().isEmpty()) { - parameters.put( - PREVIOUS_METADATA_WRAPPED_KEY_PROP, currentMetadataFile().wrappedKeyMetadata()); - } - - if (currentMetadataFile().size() > 0) { - parameters.put(PREVIOUS_METADATA_SIZE_PROP, Long.toString(currentMetadataFile().size())); - } + if (currentMetadataFile() != null + && currentMetadataFile().wrappedKeyMetadata() != null + && !currentMetadataFile().wrappedKeyMetadata().isEmpty()) { + parameters.put( + PREVIOUS_METADATA_WRAPPED_KEY_PROP, currentMetadataFile().wrappedKeyMetadata()); + parameters.put(PREVIOUS_METADATA_SIZE_PROP, Long.toString(currentMetadataFile().size())); } // If needed set the 'storage_handler' property to enable query from Hive From 3cbc0c2b9da75ec4ec94b721993840b6e73fae99 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Mon, 20 May 2024 14:10:16 +0300 Subject: [PATCH 13/20] encrypt manifest list keys with metadata key --- .../iceberg/BaseMetastoreTableOperations.java | 40 ++++++++++++++----- .../apache/iceberg/TableMetadataParser.java | 40 ++++++++++++++----- .../org/apache/iceberg/hive/HiveCatalog.java | 2 +- .../iceberg/hive/HiveTableOperations.java | 16 +++++--- .../spark/sql/TestTableEncryption.java | 32 +++++++++++++++ 5 files changed, 103 insertions(+), 27 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java index 3c07dcfed1dd..724f6d0edfc8 100644 --- a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java @@ -87,6 +87,11 @@ protected int dekLength() { return encryptionDekLength; } + // TODO metadata encrypt flag (on/off) + protected boolean encryptMetadataFile() { + return true; + } + @Override public TableMetadata current() { if (shouldRefresh) { @@ -177,10 +182,11 @@ protected String writeNewMetadataIfRequired(boolean newTable, TableMetadata meta protected MetadataFile writeNewMetadataFileIfRequired(boolean newTable, TableMetadata metadata) { return newTable && metadata.metadataFileLocation() != null - ? new MetadataFile(metadata.metadataFileLocation(), null, -1L) + ? new MetadataFile(metadata.metadataFileLocation(), null, 0L) : writeNewMetadataFile(metadata, currentVersion() + 1); } + // TODO no usages protected String writeNewMetadata(TableMetadata metadata, int newVersion) { return writeNewMetadataFile(metadata, newVersion).location(); } @@ -195,7 +201,10 @@ protected MetadataFile writeNewMetadataFile(TableMetadata metadata, int newVersi OutputFile newMetadataFile; String wrappedMetadataKey; - if (encryptionKeyId != null) { + if (encryptionKeyId == null) { + newMetadataFile = io().newOutputFile(newTableMetadataFilePath); + wrappedMetadataKey = null; + } else { if (encryptionDekLength < 0) { String encryptionDekLenProp = @@ -221,31 +230,39 @@ protected MetadataFile writeNewMetadataFile(TableMetadata metadata, int newVersi throw new IllegalStateException("Null key metadata in encrypted table"); } - newMetadataFile = newEncryptedMetadataFile.encryptingOutputFile(); + if (encryptMetadataFile()) { + newMetadataFile = newEncryptedMetadataFile.encryptingOutputFile(); + } else { + newMetadataFile = io.newOutputFile(newTableMetadataFilePath); + } + EncryptionManager encryptionManager = encryptingIO.encryptionManager(); - Preconditions.checkArgument( + Preconditions.checkState( encryptionManager instanceof StandardEncryptionManager, "Cannot encrypt table metadata because the encryption manager (%s) does not " + "implement StandardEncryptionManager", encryptionManager.getClass()); - NativeEncryptionKeyMetadata keyMetadata = + NativeEncryptionKeyMetadata metadataKeyMetadata = (NativeEncryptionKeyMetadata) newEncryptedMetadataFile.keyMetadata(); - ByteBuffer metadataEncryptionKey = keyMetadata.encryptionKey(); + ByteBuffer metadataEncryptionKey = metadataKeyMetadata.encryptionKey(); + ByteBuffer metadataAADPrefix = metadataKeyMetadata.aadPrefix(); + + // encrypt manifest list keys with metadata key + for (Snapshot snapshot : metadata.snapshots()) { + snapshot.encryptManifestListKeyMetadata(metadataEncryptionKey, metadataAADPrefix); + } + // Wrap (encrypt) metadata file key ByteBuffer wrappedEncryptionKey = ((StandardEncryptionManager) encryptionManager).wrapKey(metadataEncryptionKey); - ByteBuffer metadataAADPrefix = keyMetadata.aadPrefix(); wrappedMetadataKey = Base64.getEncoder() .encodeToString( EncryptionUtil.createKeyMetadata(wrappedEncryptionKey, metadataAADPrefix) .buffer() .array()); - } else { - newMetadataFile = io().newOutputFile(newTableMetadataFilePath); - wrappedMetadataKey = null; } // write the new metadata @@ -264,10 +281,12 @@ protected void refreshFromMetadataLocation(String newLocation) { refreshFromMetadataLocation(newLocation, null, 20); } + // TODO no usages protected void refreshFromMetadataLocation(String newLocation, int numRetries) { refreshFromMetadataLocation(newLocation, null, numRetries); } + // TODO metadata decrypt flag (on/off) protected void refreshFromMetadataLocation(MetadataFile newLocation, int numRetries) { refreshFromMetadataLocation(newLocation, null, numRetries); } @@ -281,6 +300,7 @@ protected void refreshFromMetadataLocation( metadataLocation -> TableMetadataParser.read(io(), metadataLocation)); } + // TODO metadata decrypt flag (on/off) protected void refreshFromMetadataLocation( MetadataFile newLocation, Predicate shouldRetry, int numRetries) { refreshFromMetadataLocation( diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 01d1da9c645b..84636b4e0f2c 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -151,7 +151,7 @@ public static long internalWrite( } } - return -1L; + return 0L; } public static String getFileExtension(String codecName) { @@ -293,6 +293,7 @@ public static TableMetadata read(FileIO io, String path) { return read(io, io.newInputFile(path)); } + // TODO metadata decrypt flag (on/off) public static TableMetadata read(FileIO io, MetadataFile metadataFile) { if (metadataFile.wrappedKeyMetadata() == null) { return read(io, io.newInputFile(metadataFile.location())); @@ -318,25 +319,31 @@ public static TableMetadata read(FileIO io, MetadataFile metadataFile) { // Unwrap (decrypt) metadata file key NativeEncryptionKeyMetadata keyMetadata = EncryptionUtil.parseKeyMetadata(keyMetadataBytes); - ByteBuffer unwrappedManfestListKey = + ByteBuffer unwrappedMetadataKey = standardEncryptionManager.unwrapKey(keyMetadata.encryptionKey()); - EncryptionKeyMetadata unwrappedKeyMetadata = - EncryptionUtil.createKeyMetadata(unwrappedManfestListKey, keyMetadata.aadPrefix()); + EncryptionKeyMetadata metadataKeyMetadata = + EncryptionUtil.createKeyMetadata(unwrappedMetadataKey, keyMetadata.aadPrefix()); InputFile input = encryptingFileIO.newDecryptingInputFile( - metadataFile.location(), metadataFile.size(), unwrappedKeyMetadata.buffer()); + metadataFile.location(), metadataFile.size(), metadataKeyMetadata.buffer()); - return read(io, input); + return read(io, input, unwrappedMetadataKey, keyMetadata.aadPrefix()); } } public static TableMetadata read(FileIO io, InputFile file) { + return read(io, file, null, null); + } + + public static TableMetadata read( + FileIO io, InputFile file, ByteBuffer metadataKey, ByteBuffer metadataAadPrefix) { Codec codec = Codec.fromFileName(file.location()); try (InputStream is = codec == Codec.GZIP ? new GZIPInputStream(file.newStream()) : file.newStream()) { - return fromJson(file, JsonUtil.mapper().readValue(is, JsonNode.class)); + return fromJson( + file, JsonUtil.mapper().readValue(is, JsonNode.class), metadataKey, metadataAadPrefix); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to read file: %s", file); } @@ -366,15 +373,28 @@ public static TableMetadata fromJson(String metadataLocation, String json) { } public static TableMetadata fromJson(InputFile file, JsonNode node) { - return fromJson(file.location(), node); + return fromJson(file, node, null, null); + } + + public static TableMetadata fromJson( + InputFile file, JsonNode node, ByteBuffer metadataKey, ByteBuffer metadataAadPrefix) { + return fromJson(file.location(), node, metadataKey, metadataAadPrefix); } public static TableMetadata fromJson(JsonNode node) { return fromJson((String) null, node); } - @SuppressWarnings({"checkstyle:CyclomaticComplexity", "checkstyle:MethodLength"}) public static TableMetadata fromJson(String metadataLocation, JsonNode node) { + return fromJson(metadataLocation, node, null, null); + } + + @SuppressWarnings({"checkstyle:CyclomaticComplexity", "checkstyle:MethodLength"}) + public static TableMetadata fromJson( + String metadataLocation, + JsonNode node, + ByteBuffer metadataKey, + ByteBuffer metadataAadPrefix) { Preconditions.checkArgument( node.isObject(), "Cannot parse metadata from a non-object: %s", node); @@ -534,7 +554,7 @@ public static TableMetadata fromJson(String metadataLocation, JsonNode node) { snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); Iterator iterator = snapshotArray.elements(); while (iterator.hasNext()) { - snapshots.add(SnapshotParser.fromJson(iterator.next())); + snapshots.add(SnapshotParser.fromJson(iterator.next(), metadataKey, metadataAadPrefix)); } } else { snapshots = ImmutableList.of(); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index d958568aa1ea..982fac3107b4 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -113,7 +113,7 @@ public void initialize(String inputName, Map properties) { ? new HadoopFileIO(conf) : CatalogUtil.loadFileIO(fileIOImpl, properties, conf); - if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_TYPE)) { + if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_IMPL)) { this.keyManagementClient = EncryptionUtil.createKmsClient(properties); } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index fa7ff5450d6d..49472828aef8 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -184,10 +184,10 @@ public EncryptionManager encryption() { return encryptionManager; } - String tableKeyID = encryptionKeyIdFromProps(); // writing + String tableKeyID = encryptionKeyIdFromProps(); if (tableKeyID == null) { - tableKeyID = encryptionKeyIdFromHms(); // reading + tableKeyID = encryptionKeyIdFromHms(); } if (tableKeyID != null) { @@ -230,14 +230,16 @@ private String encryptionKeyIdFromHms() { @Override protected void doRefresh() { String metadataLocation = null; - String metadataKey = null; - long metadataSize = -1L; + String metadataKeyMetadata = null; + long metadataSize = 0L; try { Table table = metaClients.run(client -> client.getTable(database, tableName)); HiveOperationsBase.validateTableIsIceberg(table, fullName); metadataLocation = table.getParameters().get(METADATA_LOCATION_PROP); - metadataKey = table.getParameters().get(METADATA_WRAPPED_KEY_PROP); + // TODO do we need to lock/unlock Hive table, to get all 3 params in one atomic operation? + // TODO Or bundle all 3 in one string? (serialized MetadataFile object) + metadataKeyMetadata = table.getParameters().get(METADATA_WRAPPED_KEY_PROP); String metadataSizeString = table.getParameters().get(METADATA_SIZE_PROP); if (metadataSizeString != null) { metadataSize = Long.valueOf(metadataSizeString); @@ -258,8 +260,10 @@ protected void doRefresh() { throw new RuntimeException("Interrupted during refresh", e); } - MetadataFile metadataFile = new MetadataFile(metadataLocation, metadataKey, metadataSize); + MetadataFile metadataFile = + new MetadataFile(metadataLocation, metadataKeyMetadata, metadataSize); + // TODO metadata decrypt flag (on/off) refreshFromMetadataLocation(metadataFile, metadataRefreshMaxRetries); } diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java index 54bd4089e090..e081ea7b0cbf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -93,6 +93,34 @@ public void testSelect() { assertEquals("Should return all expected rows", expected, sql("SELECT * FROM %s", tableName)); } + @TestTemplate + public void testInsertAndDelete() { + sql("INSERT INTO %s VALUES (4, 'd', 4.0), (5, 'e', 5.0), (6, 'f', float('NaN'))", tableName); + + List expected = + ImmutableList.of( + row(1L, "a", 1.0F), + row(2L, "b", 2.0F), + row(3L, "c", Float.NaN), + row(4L, "d", 4.0F), + row(5L, "e", 5.0F), + row(6L, "f", Float.NaN)); + + assertEquals( + "Should return all expected rows", + expected, + sql("SELECT * FROM %s ORDER BY id", tableName)); + + sql("DELETE FROM %s WHERE id < 4", tableName); + + expected = ImmutableList.of(row(4L, "d", 4.0F), row(5L, "e", 5.0F), row(6L, "f", Float.NaN)); + + assertEquals( + "Should return all expected rows", + expected, + sql("SELECT * FROM %s ORDER BY id", tableName)); + } + @TestTemplate public void testDirectDataFileRead() { List dataFileTable = @@ -142,6 +170,10 @@ public void testManifestEncryption() throws IOException { } } + if (metadataFolderPath == null) { + throw new RuntimeException("No metadata folder found for table " + tableName); + } + // Find manifest list and metadata files; check their encryption File[] listOfMetadataFiles = new File(metadataFolderPath).listFiles(); boolean foundManifestListFile = false; From 9d9a5228df6f385113e5ba2a1f37ff031d5cdb45 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 28 May 2024 14:16:47 +0300 Subject: [PATCH 14/20] comments --- .../java/org/apache/iceberg/hive/HiveTableOperations.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 49472828aef8..8a68db4f031b 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -237,13 +237,12 @@ protected void doRefresh() { HiveOperationsBase.validateTableIsIceberg(table, fullName); metadataLocation = table.getParameters().get(METADATA_LOCATION_PROP); - // TODO do we need to lock/unlock Hive table, to get all 3 params in one atomic operation? - // TODO Or bundle all 3 in one string? (serialized MetadataFile object) metadataKeyMetadata = table.getParameters().get(METADATA_WRAPPED_KEY_PROP); String metadataSizeString = table.getParameters().get(METADATA_SIZE_PROP); if (metadataSizeString != null) { metadataSize = Long.valueOf(metadataSizeString); } + // TODO get "encrypted metadata" flag here? Or rely on catalog prop? } catch (NoSuchObjectException e) { if (currentMetadataLocation() != null) { @@ -460,6 +459,7 @@ private void setHmsTableParameters( if (newMetadataFile.wrappedKeyMetadata() != null) { parameters.put(METADATA_WRAPPED_KEY_PROP, newMetadataFile.wrappedKeyMetadata()); parameters.put(METADATA_SIZE_PROP, Long.toString(newMetadataFile.size())); + // TODO set "encrypted metadata" flag here? Or rely on catalog prop in readers? } if (currentMetadataLocation() != null && !currentMetadataLocation().isEmpty()) { From 839b9cd33ba1da47f42156a517406396284534f6 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 11 Jun 2024 10:03:56 +0300 Subject: [PATCH 15/20] wrap snapshot key encryption key in KMS --- .../iceberg/BaseMetastoreTableOperations.java | 165 ++---------------- .../java/org/apache/iceberg/MetadataFile.java | 56 ------ .../apache/iceberg/TableMetadataParser.java | 96 ++++------ .../iceberg/encryption/EncryptionUtil.java | 30 ++-- .../encryption/EncryptionTestHelpers.java | 7 +- .../iceberg/hive/HiveTableOperations.java | 123 ++++++------- .../spark/sql/TestTableEncryption.java | 9 - 7 files changed, 118 insertions(+), 368 deletions(-) delete mode 100644 core/src/main/java/org/apache/iceberg/MetadataFile.java diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java index 724f6d0edfc8..6443cf6e60ea 100644 --- a/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java +++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java @@ -18,19 +18,12 @@ */ package org.apache.iceberg; -import java.nio.ByteBuffer; -import java.util.Base64; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Predicate; -import org.apache.iceberg.encryption.EncryptedOutputFile; -import org.apache.iceberg.encryption.EncryptingFileIO; import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.EncryptionUtil; -import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; -import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -53,21 +46,14 @@ public abstract class BaseMetastoreTableOperations extends BaseMetastoreOperatio public static final String TABLE_TYPE_PROP = "table_type"; public static final String ICEBERG_TABLE_TYPE_VALUE = "iceberg"; public static final String METADATA_LOCATION_PROP = "metadata_location"; - public static final String METADATA_WRAPPED_KEY_PROP = "metadata_wrapped_key"; - public static final String METADATA_SIZE_PROP = "metadata_size"; public static final String PREVIOUS_METADATA_LOCATION_PROP = "previous_metadata_location"; - public static final String PREVIOUS_METADATA_WRAPPED_KEY_PROP = "previous_metadata_wrapped_key"; - public static final String PREVIOUS_METADATA_SIZE_PROP = "previous_metadata_size"; private static final String METADATA_FOLDER_NAME = "metadata"; private TableMetadata currentMetadata = null; private String currentMetadataLocation = null; - private MetadataFile currentMetadataFile = null; private boolean shouldRefresh = true; private int version = -1; - private String encryptionKeyId; - private int encryptionDekLength = -1; protected BaseMetastoreTableOperations() {} @@ -79,19 +65,6 @@ protected BaseMetastoreTableOperations() {} */ protected abstract String tableName(); - protected String encryptionKeyIdFromProps() { - return encryptionKeyId; - } - - protected int dekLength() { - return encryptionDekLength; - } - - // TODO metadata encrypt flag (on/off) - protected boolean encryptMetadataFile() { - return true; - } - @Override public TableMetadata current() { if (shouldRefresh) { @@ -104,10 +77,6 @@ public String currentMetadataLocation() { return currentMetadataLocation; } - public MetadataFile currentMetadataFile() { - return currentMetadataFile; - } - public int currentVersion() { return version; } @@ -177,120 +146,31 @@ protected void disableRefresh() { } protected String writeNewMetadataIfRequired(boolean newTable, TableMetadata metadata) { - return writeNewMetadataFileIfRequired(newTable, metadata).location(); - } - - protected MetadataFile writeNewMetadataFileIfRequired(boolean newTable, TableMetadata metadata) { return newTable && metadata.metadataFileLocation() != null - ? new MetadataFile(metadata.metadataFileLocation(), null, 0L) - : writeNewMetadataFile(metadata, currentVersion() + 1); + ? metadata.metadataFileLocation() + : writeNewMetadata(metadata, currentVersion() + 1); } - // TODO no usages protected String writeNewMetadata(TableMetadata metadata, int newVersion) { - return writeNewMetadataFile(metadata, newVersion).location(); - } - - protected MetadataFile writeNewMetadataFile(TableMetadata metadata, int newVersion) { String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion); - - if (encryptionKeyId == null) { - encryptionKeyId = metadata.property(TableProperties.ENCRYPTION_TABLE_KEY, null); - } - - OutputFile newMetadataFile; - String wrappedMetadataKey; - - if (encryptionKeyId == null) { - newMetadataFile = io().newOutputFile(newTableMetadataFilePath); - wrappedMetadataKey = null; - } else { - - if (encryptionDekLength < 0) { - String encryptionDekLenProp = - metadata.property(TableProperties.ENCRYPTION_DEK_LENGTH, null); - encryptionDekLength = - (encryptionDekLenProp == null) - ? TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT - : Integer.valueOf(encryptionDekLenProp); - } - - FileIO io = io(); - Preconditions.checkArgument( - io instanceof EncryptingFileIO, - "Cannot encrypt table metadata because the fileIO (%s) does not " - + "implement EncryptingFileIO", - io.getClass()); - EncryptingFileIO encryptingIO = (EncryptingFileIO) io(); - EncryptedOutputFile newEncryptedMetadataFile = - encryptingIO.newEncryptingOutputFile(newTableMetadataFilePath); - - if (newEncryptedMetadataFile.keyMetadata() == null - || newEncryptedMetadataFile.keyMetadata().buffer() == null) { - throw new IllegalStateException("Null key metadata in encrypted table"); - } - - if (encryptMetadataFile()) { - newMetadataFile = newEncryptedMetadataFile.encryptingOutputFile(); - } else { - newMetadataFile = io.newOutputFile(newTableMetadataFilePath); - } - - EncryptionManager encryptionManager = encryptingIO.encryptionManager(); - - Preconditions.checkState( - encryptionManager instanceof StandardEncryptionManager, - "Cannot encrypt table metadata because the encryption manager (%s) does not " - + "implement StandardEncryptionManager", - encryptionManager.getClass()); - NativeEncryptionKeyMetadata metadataKeyMetadata = - (NativeEncryptionKeyMetadata) newEncryptedMetadataFile.keyMetadata(); - ByteBuffer metadataEncryptionKey = metadataKeyMetadata.encryptionKey(); - ByteBuffer metadataAADPrefix = metadataKeyMetadata.aadPrefix(); - - // encrypt manifest list keys with metadata key - for (Snapshot snapshot : metadata.snapshots()) { - snapshot.encryptManifestListKeyMetadata(metadataEncryptionKey, metadataAADPrefix); - } - - // Wrap (encrypt) metadata file key - ByteBuffer wrappedEncryptionKey = - ((StandardEncryptionManager) encryptionManager).wrapKey(metadataEncryptionKey); - - wrappedMetadataKey = - Base64.getEncoder() - .encodeToString( - EncryptionUtil.createKeyMetadata(wrappedEncryptionKey, metadataAADPrefix) - .buffer() - .array()); - } + OutputFile newMetadataLocation = io().newOutputFile(newTableMetadataFilePath); // write the new metadata // use overwrite to avoid negative caching in S3. this is safe because the metadata location is // always unique because it includes a UUID. - long size = TableMetadataParser.overwrite(metadata, newMetadataFile); + TableMetadataParser.overwrite(metadata, newMetadataLocation); - if (encryptionKeyId != null && size <= 0) { - throw new RuntimeException("Metadata file size is not recorded in an encrypted table"); - } - - return new MetadataFile(newMetadataFile.location(), wrappedMetadataKey, size); + return newMetadataLocation.location(); } protected void refreshFromMetadataLocation(String newLocation) { refreshFromMetadataLocation(newLocation, null, 20); } - // TODO no usages protected void refreshFromMetadataLocation(String newLocation, int numRetries) { refreshFromMetadataLocation(newLocation, null, numRetries); } - // TODO metadata decrypt flag (on/off) - protected void refreshFromMetadataLocation(MetadataFile newLocation, int numRetries) { - refreshFromMetadataLocation(newLocation, null, numRetries); - } - protected void refreshFromMetadataLocation( String newLocation, Predicate shouldRetry, int numRetries) { refreshFromMetadataLocation( @@ -300,41 +180,17 @@ protected void refreshFromMetadataLocation( metadataLocation -> TableMetadataParser.read(io(), metadataLocation)); } - // TODO metadata decrypt flag (on/off) - protected void refreshFromMetadataLocation( - MetadataFile newLocation, Predicate shouldRetry, int numRetries) { - refreshFromMetadataLocation( - newLocation, - shouldRetry, - numRetries, - metadataFile -> TableMetadataParser.read(io(), metadataFile)); - } - protected void refreshFromMetadataLocation( String newLocation, Predicate shouldRetry, int numRetries, Function metadataLoader) { - MetadataFile metadataFile = new MetadataFile(newLocation, null, 0); - - refreshFromMetadataLocation( - metadataFile, - shouldRetry, - numRetries, - metadataLocation -> metadataLoader.apply(metadataLocation.location())); - } - - protected void refreshFromMetadataLocation( - MetadataFile newMetadataFile, - Predicate shouldRetry, - int numRetries, - Function metadataLoader) { // use null-safe equality check because new tables have a null metadata location - if (!Objects.equal(currentMetadataLocation, newMetadataFile.location())) { - LOG.info("Refreshing table metadata from new version: {}", newMetadataFile.location()); + if (!Objects.equal(currentMetadataLocation, newLocation)) { + LOG.info("Refreshing table metadata from new version: {}", newLocation); AtomicReference newMetadata = new AtomicReference<>(); - Tasks.foreach(newMetadataFile) + Tasks.foreach(newLocation) .retry(numRetries) .exponentialBackoff(100, 5000, 600000, 4.0 /* 100, 400, 1600, ... */) .throwFailureWhenFinished() @@ -352,9 +208,8 @@ protected void refreshFromMetadataLocation( } this.currentMetadata = newMetadata.get(); - this.currentMetadataLocation = newMetadataFile.location(); - this.currentMetadataFile = newMetadataFile; - this.version = parseVersion(newMetadataFile.location()); + this.currentMetadataLocation = newLocation; + this.version = parseVersion(newLocation); } this.shouldRefresh = false; } diff --git a/core/src/main/java/org/apache/iceberg/MetadataFile.java b/core/src/main/java/org/apache/iceberg/MetadataFile.java deleted file mode 100644 index a6ec74b14392..000000000000 --- a/core/src/main/java/org/apache/iceberg/MetadataFile.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg; - -import java.io.Serializable; - -public class MetadataFile implements Serializable { - private final String location; - private final String wrappedKeyMetadata; - private final long size; - - public MetadataFile(String location, String wrappedKeyMetadata, long size) { - this.location = location; - this.wrappedKeyMetadata = wrappedKeyMetadata; - this.size = size; - } - - /** Location of metadata file. */ - public String location() { - return location; - } - - /** - * Key metadata for metadata file in encrypted table. If contains the file encryption key - this - * key cannot be open, must be wrapped (encrypted) with the table master key. - * - * @return base64-encoded key metadata for the metadata file - */ - public String wrappedKeyMetadata() { - return wrappedKeyMetadata; - } - - /** - * In encrypted tables, return the size of metadata file. Must be a verified value, taken from a - * trusted source. In unencrypted tables, can return 0. - */ - public long size() { - return size; - } -} diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 84636b4e0f2c..6778ae866654 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -25,9 +25,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.StringWriter; -import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.Iterator; import java.util.List; import java.util.Locale; @@ -37,9 +35,7 @@ import org.apache.iceberg.TableMetadata.MetadataLogEntry; import org.apache.iceberg.TableMetadata.SnapshotLogEntry; import org.apache.iceberg.encryption.EncryptingFileIO; -import org.apache.iceberg.encryption.EncryptionKeyMetadata; -import org.apache.iceberg.encryption.EncryptionUtil; -import org.apache.iceberg.encryption.NativeEncryptionKeyMetadata; +import org.apache.iceberg.encryption.EncryptionManager; import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileIO; @@ -112,6 +108,7 @@ private TableMetadataParser() {} static final String REFS = "refs"; static final String SNAPSHOTS = "snapshots"; static final String SNAPSHOT_ID = "snapshot-id"; + static final String SNAPSHOT_KEK = "snapshot-kek"; static final String TIMESTAMP_MS = "timestamp-ms"; static final String SNAPSHOT_LOG = "snapshot-log"; static final String METADATA_FILE = "metadata-file"; @@ -240,12 +237,24 @@ public static void toJson(TableMetadata metadata, JsonGenerator generator) throw toJson(metadata.refs(), generator); + String snapshotKek = null; generator.writeArrayFieldStart(SNAPSHOTS); for (Snapshot snapshot : metadata.snapshots()) { SnapshotParser.toJson(snapshot, generator); + if (snapshotKek == null) { + snapshotKek = snapshot.manifestListFile().wrappedKeyEncryptionKey(); + } else { + Preconditions.checkState( + snapshotKek.equals(snapshot.manifestListFile().wrappedKeyEncryptionKey()), + "KEK must be identical for all snapshots"); + } } generator.writeEndArray(); + if (snapshotKek != null) { + generator.writeStringField(SNAPSHOT_KEK, snapshotKek); + } + generator.writeArrayFieldStart(STATISTICS); for (StatisticsFile statisticsFile : metadata.statisticsFiles()) { StatisticsFileParser.toJson(statisticsFile, generator); @@ -293,57 +302,13 @@ public static TableMetadata read(FileIO io, String path) { return read(io, io.newInputFile(path)); } - // TODO metadata decrypt flag (on/off) - public static TableMetadata read(FileIO io, MetadataFile metadataFile) { - if (metadataFile.wrappedKeyMetadata() == null) { - return read(io, io.newInputFile(metadataFile.location())); - } else { - Preconditions.checkArgument( - io instanceof EncryptingFileIO, - "Cannot read table metadata (%s) because it is encrypted but the configured " - + "FileIO (%s) does not implement EncryptingFileIO", - io.getClass()); - EncryptingFileIO encryptingFileIO = (EncryptingFileIO) io; - - Preconditions.checkArgument( - encryptingFileIO.encryptionManager() instanceof StandardEncryptionManager, - "Cannot decrypt table metadata because the encryption manager (%s) does not " - + "implement StandardEncryptionManager", - encryptingFileIO.encryptionManager().getClass()); - - StandardEncryptionManager standardEncryptionManager = - (StandardEncryptionManager) encryptingFileIO.encryptionManager(); - - ByteBuffer keyMetadataBytes = - ByteBuffer.wrap(Base64.getDecoder().decode(metadataFile.wrappedKeyMetadata())); - - // Unwrap (decrypt) metadata file key - NativeEncryptionKeyMetadata keyMetadata = EncryptionUtil.parseKeyMetadata(keyMetadataBytes); - ByteBuffer unwrappedMetadataKey = - standardEncryptionManager.unwrapKey(keyMetadata.encryptionKey()); - - EncryptionKeyMetadata metadataKeyMetadata = - EncryptionUtil.createKeyMetadata(unwrappedMetadataKey, keyMetadata.aadPrefix()); - - InputFile input = - encryptingFileIO.newDecryptingInputFile( - metadataFile.location(), metadataFile.size(), metadataKeyMetadata.buffer()); - - return read(io, input, unwrappedMetadataKey, keyMetadata.aadPrefix()); - } - } - public static TableMetadata read(FileIO io, InputFile file) { - return read(io, file, null, null); - } - - public static TableMetadata read( - FileIO io, InputFile file, ByteBuffer metadataKey, ByteBuffer metadataAadPrefix) { + EncryptionManager encryption = + (io instanceof EncryptingFileIO) ? ((EncryptingFileIO) io).encryptionManager() : null; Codec codec = Codec.fromFileName(file.location()); try (InputStream is = codec == Codec.GZIP ? new GZIPInputStream(file.newStream()) : file.newStream()) { - return fromJson( - file, JsonUtil.mapper().readValue(is, JsonNode.class), metadataKey, metadataAadPrefix); + return fromJson(file, JsonUtil.mapper().readValue(is, JsonNode.class), encryption); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to read file: %s", file); } @@ -373,12 +338,12 @@ public static TableMetadata fromJson(String metadataLocation, String json) { } public static TableMetadata fromJson(InputFile file, JsonNode node) { - return fromJson(file, node, null, null); + return fromJson(file, node, null); } public static TableMetadata fromJson( - InputFile file, JsonNode node, ByteBuffer metadataKey, ByteBuffer metadataAadPrefix) { - return fromJson(file.location(), node, metadataKey, metadataAadPrefix); + InputFile file, JsonNode node, EncryptionManager encryption) { + return fromJson(file.location(), node, encryption); } public static TableMetadata fromJson(JsonNode node) { @@ -386,15 +351,12 @@ public static TableMetadata fromJson(JsonNode node) { } public static TableMetadata fromJson(String metadataLocation, JsonNode node) { - return fromJson(metadataLocation, node, null, null); + return fromJson(metadataLocation, node, null); } @SuppressWarnings({"checkstyle:CyclomaticComplexity", "checkstyle:MethodLength"}) public static TableMetadata fromJson( - String metadataLocation, - JsonNode node, - ByteBuffer metadataKey, - ByteBuffer metadataAadPrefix) { + String metadataLocation, JsonNode node, EncryptionManager encryption) { Preconditions.checkArgument( node.isObject(), "Cannot parse metadata from a non-object: %s", node); @@ -551,10 +513,22 @@ public static TableMetadata fromJson( Preconditions.checkArgument( snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray); + if (node.has(SNAPSHOT_KEK)) { + String wrappedKeyEncryptionKey = JsonUtil.getString(SNAPSHOT_KEK, node); + if (wrappedKeyEncryptionKey != null) { + Preconditions.checkState( + encryption instanceof StandardEncryptionManager, + "Can't set wrapped KEK - encryption manager %s is not instance of StandardEncryptionManager", + encryption.getClass()); + ((StandardEncryptionManager) encryption) + .setWrappedKeyEncryptionKey(wrappedKeyEncryptionKey); + } + } + snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); Iterator iterator = snapshotArray.elements(); while (iterator.hasNext()) { - snapshots.add(SnapshotParser.fromJson(iterator.next(), metadataKey, metadataAadPrefix)); + snapshots.add(SnapshotParser.fromJson(iterator.next(), encryption)); } } else { snapshots = ImmutableList.of(); diff --git a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java index 07512332af76..e2cf98bf767f 100644 --- a/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java +++ b/core/src/main/java/org/apache/iceberg/encryption/EncryptionUtil.java @@ -18,12 +18,13 @@ */ package org.apache.iceberg.encryption; -import java.nio.ByteBuffer; import java.util.Map; import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.common.DynConstructors; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.PropertyUtil; public class EncryptionUtil { @@ -70,35 +71,30 @@ public static KeyManagementClient createKmsClient(Map catalogPro } public static EncryptionManager createEncryptionManager( - String tableKeyId, int dekLength, KeyManagementClient kmsClient) { + Map tableProperties, KeyManagementClient kmsClient) { Preconditions.checkArgument(kmsClient != null, "Invalid KMS client: null"); + String tableKeyId = tableProperties.get(TableProperties.ENCRYPTION_TABLE_KEY); if (null == tableKeyId) { // Unencrypted table return PlaintextEncryptionManager.instance(); } + int dataKeyLength = + PropertyUtil.propertyAsInt( + tableProperties, + TableProperties.ENCRYPTION_DEK_LENGTH, + TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT); + Preconditions.checkState( - dekLength == 16 || dekLength == 24 || dekLength == 32, + dataKeyLength == 16 || dataKeyLength == 24 || dataKeyLength == 32, "Invalid data key length: %s (must be 16, 24, or 32)", - dekLength); + dataKeyLength); - return new StandardEncryptionManager(tableKeyId, dekLength, kmsClient); + return new StandardEncryptionManager(tableKeyId, dataKeyLength, kmsClient); } public static EncryptedOutputFile plainAsEncryptedOutput(OutputFile encryptingOutputFile) { return new BaseEncryptedOutputFile(encryptingOutputFile, EncryptionKeyMetadata.empty()); } - - public static EncryptionKeyMetadata createKeyMetadata(ByteBuffer key, ByteBuffer aadPrefix) { - Preconditions.checkState( - key.arrayOffset() == 0, "Invalid key array offset {}", key.arrayOffset()); - Preconditions.checkState( - aadPrefix.arrayOffset() == 0, "Invalid aad array offset {}", aadPrefix.arrayOffset()); - return new StandardKeyMetadata(key.array(), aadPrefix.array()); - } - - public static NativeEncryptionKeyMetadata parseKeyMetadata(ByteBuffer keyMetadataBytes) { - return StandardKeyMetadata.parse(keyMetadataBytes); - } } diff --git a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java index cdc8c2e1515b..aa49e1c40fe2 100644 --- a/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java +++ b/core/src/test/java/org/apache/iceberg/encryption/EncryptionTestHelpers.java @@ -31,10 +31,11 @@ public static EncryptionManager createEncryptionManager() { Map catalogProperties = Maps.newHashMap(); catalogProperties.put( CatalogProperties.ENCRYPTION_KMS_IMPL, UnitestKMS.class.getCanonicalName()); + Map tableProperties = Maps.newHashMap(); + tableProperties.put(TableProperties.ENCRYPTION_TABLE_KEY, UnitestKMS.MASTER_KEY_NAME1); + tableProperties.put(TableProperties.FORMAT_VERSION, "2"); return EncryptionUtil.createEncryptionManager( - UnitestKMS.MASTER_KEY_NAME1, - TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT, - EncryptionUtil.createKmsClient(catalogProperties)); + tableProperties, EncryptionUtil.createKmsClient(catalogProperties)); } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 8a68db4f031b..618e5edb2a51 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -39,7 +39,6 @@ import org.apache.iceberg.BaseMetastoreOperations; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; -import org.apache.iceberg.MetadataFile; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; @@ -118,6 +117,8 @@ public static String translateToIcebergProp(String hmsProp) { private EncryptionManager encryptionManager; private EncryptingFileIO encryptingFileIO; private boolean encryptedTable; + private String encryptionKeyId; + private int encryptionDekLength; /** Tests only */ protected HiveTableOperations( @@ -184,13 +185,11 @@ public EncryptionManager encryption() { return encryptionManager; } - String tableKeyID = encryptionKeyIdFromProps(); - - if (tableKeyID == null) { - tableKeyID = encryptionKeyIdFromHms(); + if (encryptionKeyId == null) { + encryptionPropsFromHms(); } - if (tableKeyID != null) { + if (encryptionKeyId != null) { if (keyManagementClient == null) { throw new RuntimeException( "Cant create encryption manager, because key management client is not set"); @@ -198,7 +197,8 @@ public EncryptionManager encryption() { encryptedTable = true; encryptionManager = - EncryptionUtil.createEncryptionManager(tableKeyID, dekLength(), keyManagementClient); + EncryptionUtil.createEncryptionManager( + encryptionKeyId, encryptionDekLength, keyManagementClient); } else { encryptionManager = PlaintextEncryptionManager.instance(); } @@ -206,44 +206,14 @@ public EncryptionManager encryption() { return encryptionManager; } - private String encryptionKeyIdFromHms() { - String keyID; - try { - Table table = loadHmsTable(); - if (table == null) { - return null; - } - - keyID = table.getParameters().get(TableProperties.ENCRYPTION_TABLE_KEY); - } catch (TException e) { - String errMsg = - String.format("Failed to get table info from metastore %s.%s", database, tableName); - throw new RuntimeException(errMsg, e); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted during encryption key id retrieval", e); - } - - return keyID; - } - @Override protected void doRefresh() { String metadataLocation = null; - String metadataKeyMetadata = null; - long metadataSize = 0L; try { Table table = metaClients.run(client -> client.getTable(database, tableName)); HiveOperationsBase.validateTableIsIceberg(table, fullName); metadataLocation = table.getParameters().get(METADATA_LOCATION_PROP); - metadataKeyMetadata = table.getParameters().get(METADATA_WRAPPED_KEY_PROP); - String metadataSizeString = table.getParameters().get(METADATA_SIZE_PROP); - if (metadataSizeString != null) { - metadataSize = Long.valueOf(metadataSizeString); - } - // TODO get "encrypted metadata" flag here? Or rely on catalog prop? - } catch (NoSuchObjectException e) { if (currentMetadataLocation() != null) { throw new NoSuchTableException("No such table: %s.%s", database, tableName); @@ -259,18 +229,15 @@ protected void doRefresh() { throw new RuntimeException("Interrupted during refresh", e); } - MetadataFile metadataFile = - new MetadataFile(metadataLocation, metadataKeyMetadata, metadataSize); - - // TODO metadata decrypt flag (on/off) - refreshFromMetadataLocation(metadataFile, metadataRefreshMaxRetries); + refreshFromMetadataLocation(metadataLocation, metadataRefreshMaxRetries); } - @SuppressWarnings("checkstyle:CyclomaticComplexity") + @SuppressWarnings({"checkstyle:CyclomaticComplexity", "checkstyle:MethodLength"}) @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { boolean newTable = base == null; - MetadataFile newMetadataFile = writeNewMetadataFileIfRequired(newTable, metadata); + encryptionPropsFromMetadata(metadata); + String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); @@ -334,7 +301,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { .map(Snapshot::summary) .orElseGet(ImmutableMap::of); setHmsTableParameters( - newMetadataFile, tbl, metadata, removedProps, hiveEngineEnabled, summary); + newMetadataLocation, tbl, metadata, removedProps, hiveEngineEnabled, summary); if (!keepHiveStats) { tbl.getParameters().remove(StatsSetupConst.COLUMN_STATS_ACCURATE); @@ -393,7 +360,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { e); commitStatus = BaseMetastoreOperations.CommitStatus.valueOf( - checkCommitStatus(newMetadataFile.location(), metadata).name()); + checkCommitStatus(newMetadataLocation, metadata).name()); switch (commitStatus) { case SUCCESS: break; @@ -415,19 +382,55 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { throw new CommitFailedException(e); } finally { - HiveOperationsBase.cleanupMetadataAndUnlock( - io(), commitStatus, newMetadataFile.location(), lock); + HiveOperationsBase.cleanupMetadataAndUnlock(io(), commitStatus, newMetadataLocation, lock); } LOG.info( - "Committed to table {} with the new metadata location {}", - fullName, - newMetadataFile.location()); + "Committed to table {} with the new metadata location {}", fullName, newMetadataLocation); + } + + private void encryptionPropsFromMetadata(TableMetadata metadata) { + if (encryptionKeyId == null) { + encryptionKeyId = metadata.property(TableProperties.ENCRYPTION_TABLE_KEY, null); + } + + if (encryptionKeyId != null && encryptionDekLength <= 0) { + String dekLength = metadata.property(TableProperties.ENCRYPTION_DEK_LENGTH, null); + encryptionDekLength = + (dekLength == null) + ? TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT + : Integer.valueOf(dekLength); + } + } + + private void encryptionPropsFromHms() { + try { + Table table = loadHmsTable(); + if (table == null) { + return; + } + + encryptionKeyId = table.getParameters().get(TableProperties.ENCRYPTION_TABLE_KEY); + if (encryptionKeyId != null) { + String dekLength = table.getParameters().get(TableProperties.ENCRYPTION_DEK_LENGTH); + encryptionDekLength = + (dekLength == null) + ? TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT + : Integer.valueOf(dekLength); + } + } catch (TException e) { + String errMsg = + String.format("Failed to get table info from metastore %s.%s", database, tableName); + throw new RuntimeException(errMsg, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted during encryption key id retrieval", e); + } } @SuppressWarnings("checkstyle:CyclomaticComplexity") private void setHmsTableParameters( - MetadataFile newMetadataFile, + String newMetadataLocation, Table tbl, TableMetadata metadata, Set obsoleteProps, @@ -454,26 +457,12 @@ private void setHmsTableParameters( obsoleteProps.forEach(parameters::remove); parameters.put(TABLE_TYPE_PROP, ICEBERG_TABLE_TYPE_VALUE.toUpperCase(Locale.ENGLISH)); - parameters.put(METADATA_LOCATION_PROP, newMetadataFile.location()); - - if (newMetadataFile.wrappedKeyMetadata() != null) { - parameters.put(METADATA_WRAPPED_KEY_PROP, newMetadataFile.wrappedKeyMetadata()); - parameters.put(METADATA_SIZE_PROP, Long.toString(newMetadataFile.size())); - // TODO set "encrypted metadata" flag here? Or rely on catalog prop in readers? - } + parameters.put(METADATA_LOCATION_PROP, newMetadataLocation); if (currentMetadataLocation() != null && !currentMetadataLocation().isEmpty()) { parameters.put(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation()); } - if (currentMetadataFile() != null - && currentMetadataFile().wrappedKeyMetadata() != null - && !currentMetadataFile().wrappedKeyMetadata().isEmpty()) { - parameters.put( - PREVIOUS_METADATA_WRAPPED_KEY_PROP, currentMetadataFile().wrappedKeyMetadata()); - parameters.put(PREVIOUS_METADATA_SIZE_PROP, Long.toString(currentMetadataFile().size())); - } - // If needed set the 'storage_handler' property to enable query from Hive if (hiveEngineEnabled) { parameters.put( diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java index e081ea7b0cbf..2a2358cacc94 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestTableEncryption.java @@ -184,20 +184,11 @@ public void testManifestEncryption() throws IOException { foundManifestListFile = true; checkMetadataFileEncryption(localInput(metadataFile)); } - - if (metadataFile.getName().endsWith("metadata.json")) { - foundMetadataJson = true; - checkMetadataFileEncryption(localInput(metadataFile)); - } } if (!foundManifestListFile) { throw new RuntimeException("No manifest list files found for table " + tableName); } - - if (!foundMetadataJson) { - throw new RuntimeException("Metadata json file is not found for table " + tableName); - } } private void checkMetadataFileEncryption(InputFile file) throws IOException { From a077b637d0e1cbc8a667fa41b2bedd84d7a0b69c Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 11 Jun 2024 10:23:05 +0300 Subject: [PATCH 16/20] old return type in metadata write --- .palantir/revapi.yml | 24 ------------------- .../apache/iceberg/TableMetadataParser.java | 22 ++++------------- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 401c72b286aa..78fa772587ce 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1029,30 +1029,6 @@ acceptedBreaks: \ java.nio.ByteBuffer)" justification: "Decrypting input must receive length" org.apache.iceberg:iceberg-core: - - code: "java.method.numberOfParametersChanged" - old: "method org.apache.iceberg.encryption.EncryptionManager org.apache.iceberg.encryption.EncryptionUtil::createEncryptionManager(java.util.Map, org.apache.iceberg.encryption.KeyManagementClient)" - new: "method org.apache.iceberg.encryption.EncryptionManager org.apache.iceberg.encryption.EncryptionUtil::createEncryptionManager(java.lang.String,\ - \ int, org.apache.iceberg.encryption.KeyManagementClient)" - justification: "Create from table key id and dek length" - - code: "java.method.returnTypeChanged" - old: "method void org.apache.iceberg.TableMetadataParser::internalWrite(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile, boolean)" - new: "method long org.apache.iceberg.TableMetadataParser::internalWrite(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile, boolean)" - justification: "Return length of the written metadata file" - - code: "java.method.returnTypeChanged" - old: "method void org.apache.iceberg.TableMetadataParser::overwrite(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile)" - new: "method long org.apache.iceberg.TableMetadataParser::overwrite(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile)" - justification: "Return length of the written metadata file" - - code: "java.method.returnTypeChanged" - old: "method void org.apache.iceberg.TableMetadataParser::write(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile)" - new: "method long org.apache.iceberg.TableMetadataParser::write(org.apache.iceberg.TableMetadata,\ - \ org.apache.iceberg.io.OutputFile)" - justification: "Return length of the written metadata file" apache-iceberg-0.14.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index 6778ae866654..cd8b035b02f3 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -41,7 +41,6 @@ import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; import org.apache.iceberg.io.OutputFile; -import org.apache.iceberg.io.PositionOutputStream; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; @@ -116,15 +115,15 @@ private TableMetadataParser() {} static final String STATISTICS = "statistics"; static final String PARTITION_STATISTICS = "partition-statistics"; - public static long overwrite(TableMetadata metadata, OutputFile outputFile) { - return internalWrite(metadata, outputFile, true); + public static void overwrite(TableMetadata metadata, OutputFile outputFile) { + internalWrite(metadata, outputFile, true); } - public static long write(TableMetadata metadata, OutputFile outputFile) { - return internalWrite(metadata, outputFile, false); + public static void write(TableMetadata metadata, OutputFile outputFile) { + internalWrite(metadata, outputFile, false); } - public static long internalWrite( + public static void internalWrite( TableMetadata metadata, OutputFile outputFile, boolean overwrite) { boolean isGzip = Codec.fromFileName(outputFile.location()) == Codec.GZIP; OutputStream stream = overwrite ? outputFile.createOrOverwrite() : outputFile.create(); @@ -138,17 +137,6 @@ public static long internalWrite( } catch (IOException e) { throw new RuntimeIOException(e, "Failed to write json to file: %s", outputFile); } - - // Get file length - if (stream instanceof PositionOutputStream) { - try { - return ((PositionOutputStream) stream).storedLength(); - } catch (IOException e) { - throw new RuntimeIOException(e, "Failed to get json file length: %s", outputFile); - } - } - - return 0L; } public static String getFileExtension(String codecName) { From e04a39060d3cb7fbdce710847f45668e9af6d5f6 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Tue, 11 Jun 2024 10:30:42 +0300 Subject: [PATCH 17/20] remove unnneeded change --- core/src/main/java/org/apache/iceberg/TableMetadataParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index cd8b035b02f3..cfb7352ea13e 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -326,7 +326,7 @@ public static TableMetadata fromJson(String metadataLocation, String json) { } public static TableMetadata fromJson(InputFile file, JsonNode node) { - return fromJson(file, node, null); + return fromJson(file.location(), node); } public static TableMetadata fromJson( From 3afe781ff269003a0e282e715f324ea4cb628a70 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Thu, 1 Aug 2024 09:14:11 +0300 Subject: [PATCH 18/20] kek cache --- .palantir/revapi.yml | 11 -- .../iceberg/encryption/EncryptingFileIO.java | 11 +- .../org/apache/iceberg/TableMetadata.java | 10 ++ .../apache/iceberg/TableMetadataParser.java | 143 +++++++++--------- .../org/apache/iceberg/hive/HiveCatalog.java | 9 +- .../iceberg/hive/HiveTableOperations.java | 26 +++- 6 files changed, 121 insertions(+), 89 deletions(-) diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml index 78fa772587ce..a41d3ddfb8df 100644 --- a/.palantir/revapi.yml +++ b/.palantir/revapi.yml @@ -1018,17 +1018,6 @@ acceptedBreaks: old: "method void org.apache.iceberg.PositionDeletesTable.PositionDeletesBatchScan::(org.apache.iceberg.Table,\ \ org.apache.iceberg.Schema, org.apache.iceberg.TableScanContext)" justification: "Removing deprecated code" - "1.5.0": - org.apache.iceberg:iceberg-api: - - code: "java.class.defaultSerializationChanged" - old: "class org.apache.iceberg.encryption.EncryptingFileIO" - new: "class org.apache.iceberg.encryption.EncryptingFileIO" - justification: "Decrypting input mist receive length" - - code: "java.method.removed" - old: "method org.apache.iceberg.io.InputFile org.apache.iceberg.encryption.EncryptingFileIO::newDecryptingInputFile(java.lang.String,\ - \ java.nio.ByteBuffer)" - justification: "Decrypting input must receive length" - org.apache.iceberg:iceberg-core: apache-iceberg-0.14.0: org.apache.iceberg:iceberg-api: - code: "java.class.defaultSerializationChanged" diff --git a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java index 18e83ce8fe48..fb86769173a0 100644 --- a/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java +++ b/api/src/main/java/org/apache/iceberg/encryption/EncryptingFileIO.java @@ -111,9 +111,18 @@ public InputFile newInputFile(ManifestFile manifest) { } } + /** + * @deprecated will be removed in 2.0.0. use {@link #newDecryptingInputFile(String, long, + * ByteBuffer)} instead. + */ + @Deprecated + public InputFile newDecryptingInputFile(String path, ByteBuffer buffer) { + throw new RuntimeIOException("Deprecated API. File decryption without length is not safe"); + } + public InputFile newDecryptingInputFile(String path, long length, ByteBuffer buffer) { Preconditions.checkArgument( - length > 0, "Cannot safely decrypt table metadata file because its size is not specified"); + length > 0, "Cannot safely decrypt file %s because its length is not specified", path); InputFile inputFile = io.newInputFile(path, length); diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 9587c57a0fd2..512bedde00ca 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -29,6 +29,7 @@ import java.util.function.Predicate; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.iceberg.encryption.KeyEncryptionKey; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; import org.apache.iceberg.relocated.com.google.common.base.Objects; @@ -260,6 +261,7 @@ public String toString() { private final List partitionStatisticsFiles; private final List changes; private SerializableSupplier> snapshotsSupplier; + private Map kekCache; private volatile List snapshots; private volatile Map snapshotsById; private volatile Map refs; @@ -512,6 +514,14 @@ public List snapshots() { return snapshots; } + public void setKekCache(Map kekCache) { + this.kekCache = kekCache; + } + + public Map kekCache() { + return kekCache; + } + private synchronized void ensureSnapshotsLoaded() { if (!snapshotsLoaded) { List loadedSnapshots = Lists.newArrayList(snapshotsSupplier.get()); diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index cfb7352ea13e..eab9aceef110 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -34,9 +34,8 @@ import java.util.zip.GZIPOutputStream; import org.apache.iceberg.TableMetadata.MetadataLogEntry; import org.apache.iceberg.TableMetadata.SnapshotLogEntry; -import org.apache.iceberg.encryption.EncryptingFileIO; -import org.apache.iceberg.encryption.EncryptionManager; -import org.apache.iceberg.encryption.StandardEncryptionManager; +import org.apache.iceberg.encryption.EncryptionUtil; +import org.apache.iceberg.encryption.KeyEncryptionKey; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.InputFile; @@ -45,6 +44,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.JsonUtil; public class TableMetadataParser { @@ -107,7 +107,9 @@ private TableMetadataParser() {} static final String REFS = "refs"; static final String SNAPSHOTS = "snapshots"; static final String SNAPSHOT_ID = "snapshot-id"; - static final String SNAPSHOT_KEK = "snapshot-kek"; + static final String KEK_CACHE = "kek-cache"; + static final String KEK_ID = "kek-id"; + static final String KEK_WRAP = "kek-wrap"; static final String TIMESTAMP_MS = "timestamp-ms"; static final String SNAPSHOT_LOG = "snapshot-log"; static final String METADATA_FILE = "metadata-file"; @@ -127,7 +129,6 @@ public static void internalWrite( TableMetadata metadata, OutputFile outputFile, boolean overwrite) { boolean isGzip = Codec.fromFileName(outputFile.location()) == Codec.GZIP; OutputStream stream = overwrite ? outputFile.createOrOverwrite() : outputFile.create(); - try (OutputStream ou = isGzip ? new GZIPOutputStream(stream) : stream; OutputStreamWriter writer = new OutputStreamWriter(ou, StandardCharsets.UTF_8)) { JsonGenerator generator = JsonUtil.factory().createGenerator(writer); @@ -225,24 +226,24 @@ public static void toJson(TableMetadata metadata, JsonGenerator generator) throw toJson(metadata.refs(), generator); - String snapshotKek = null; + if (metadata.kekCache() != null && !metadata.kekCache().isEmpty()) { + generator.writeArrayFieldStart(KEK_CACHE); + for (Map.Entry entry : metadata.kekCache().entrySet()) { + generator.writeStartObject(); + generator.writeStringField(KEK_ID, entry.getKey()); + generator.writeStringField(KEK_WRAP, entry.getValue().wrappedKey()); + generator.writeNumberField(TIMESTAMP_MS, entry.getValue().timestamp()); + generator.writeEndObject(); + } + generator.writeEndArray(); + } + generator.writeArrayFieldStart(SNAPSHOTS); for (Snapshot snapshot : metadata.snapshots()) { SnapshotParser.toJson(snapshot, generator); - if (snapshotKek == null) { - snapshotKek = snapshot.manifestListFile().wrappedKeyEncryptionKey(); - } else { - Preconditions.checkState( - snapshotKek.equals(snapshot.manifestListFile().wrappedKeyEncryptionKey()), - "KEK must be identical for all snapshots"); - } } generator.writeEndArray(); - if (snapshotKek != null) { - generator.writeStringField(SNAPSHOT_KEK, snapshotKek); - } - generator.writeArrayFieldStart(STATISTICS); for (StatisticsFile statisticsFile : metadata.statisticsFiles()) { StatisticsFileParser.toJson(statisticsFile, generator); @@ -291,12 +292,14 @@ public static TableMetadata read(FileIO io, String path) { } public static TableMetadata read(FileIO io, InputFile file) { - EncryptionManager encryption = - (io instanceof EncryptingFileIO) ? ((EncryptingFileIO) io).encryptionManager() : null; Codec codec = Codec.fromFileName(file.location()); try (InputStream is = codec == Codec.GZIP ? new GZIPInputStream(file.newStream()) : file.newStream()) { - return fromJson(file, JsonUtil.mapper().readValue(is, JsonNode.class), encryption); + TableMetadata tableMetadata = fromJson(file, JsonUtil.mapper().readValue(is, JsonNode.class)); + if (tableMetadata.kekCache() != null) { + EncryptionUtil.getKekCacheFromMetadata(io, tableMetadata.kekCache()); + } + return tableMetadata; } catch (IOException e) { throw new RuntimeIOException(e, "Failed to read file: %s", file); } @@ -329,22 +332,12 @@ public static TableMetadata fromJson(InputFile file, JsonNode node) { return fromJson(file.location(), node); } - public static TableMetadata fromJson( - InputFile file, JsonNode node, EncryptionManager encryption) { - return fromJson(file.location(), node, encryption); - } - public static TableMetadata fromJson(JsonNode node) { return fromJson((String) null, node); } - public static TableMetadata fromJson(String metadataLocation, JsonNode node) { - return fromJson(metadataLocation, node, null); - } - @SuppressWarnings({"checkstyle:CyclomaticComplexity", "checkstyle:MethodLength"}) - public static TableMetadata fromJson( - String metadataLocation, JsonNode node, EncryptionManager encryption) { + public static TableMetadata fromJson(String metadataLocation, JsonNode node) { Preconditions.checkArgument( node.isObject(), "Cannot parse metadata from a non-object: %s", node); @@ -495,28 +488,33 @@ public static TableMetadata fromJson( refs = ImmutableMap.of(); } + Map kekCache = null; + if (node.has(KEK_CACHE)) { + kekCache = Maps.newHashMap(); + Iterator cacheIterator = node.get(KEK_CACHE).elements(); + while (cacheIterator.hasNext()) { + JsonNode entryNode = cacheIterator.next(); + String kekID = JsonUtil.getString(KEK_ID, entryNode); + kekCache.put( + kekID, + new KeyEncryptionKey( + kekID, + null, // key will be unwrapped later + JsonUtil.getString(KEK_WRAP, entryNode), + JsonUtil.getLong(TIMESTAMP_MS, entryNode))); + } + } + List snapshots; if (node.has(SNAPSHOTS)) { JsonNode snapshotArray = JsonUtil.get(SNAPSHOTS, node); Preconditions.checkArgument( snapshotArray.isArray(), "Cannot parse snapshots from non-array: %s", snapshotArray); - if (node.has(SNAPSHOT_KEK)) { - String wrappedKeyEncryptionKey = JsonUtil.getString(SNAPSHOT_KEK, node); - if (wrappedKeyEncryptionKey != null) { - Preconditions.checkState( - encryption instanceof StandardEncryptionManager, - "Can't set wrapped KEK - encryption manager %s is not instance of StandardEncryptionManager", - encryption.getClass()); - ((StandardEncryptionManager) encryption) - .setWrappedKeyEncryptionKey(wrappedKeyEncryptionKey); - } - } - snapshots = Lists.newArrayListWithExpectedSize(snapshotArray.size()); Iterator iterator = snapshotArray.elements(); while (iterator.hasNext()) { - snapshots.add(SnapshotParser.fromJson(iterator.next(), encryption)); + snapshots.add(SnapshotParser.fromJson(iterator.next())); } } else { snapshots = ImmutableList.of(); @@ -560,31 +558,38 @@ public static TableMetadata fromJson( } } - return new TableMetadata( - metadataLocation, - formatVersion, - uuid, - location, - lastSequenceNumber, - lastUpdatedMillis, - lastAssignedColumnId, - currentSchemaId, - schemas, - defaultSpecId, - specs, - lastAssignedPartitionId, - defaultSortOrderId, - sortOrders, - properties, - currentSnapshotId, - snapshots, - null, - entries.build(), - metadataEntries.build(), - refs, - statisticsFiles, - partitionStatisticsFiles, - ImmutableList.of() /* no changes from the file */); + TableMetadata result = + new TableMetadata( + metadataLocation, + formatVersion, + uuid, + location, + lastSequenceNumber, + lastUpdatedMillis, + lastAssignedColumnId, + currentSchemaId, + schemas, + defaultSpecId, + specs, + lastAssignedPartitionId, + defaultSortOrderId, + sortOrders, + properties, + currentSnapshotId, + snapshots, + null, + entries.build(), + metadataEntries.build(), + refs, + statisticsFiles, + partitionStatisticsFiles, + ImmutableList.of()); /* no changes from the file */ + + if (kekCache != null) { + result.setKekCache(kekCache); + } + + return result; } private static Map refsFromJson(JsonNode refMap) { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 982fac3107b4..0552e7665cd6 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -58,6 +58,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.LocationUtil; +import org.apache.iceberg.util.PropertyUtil; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,6 +83,7 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa private ClientPool clients; private boolean listAllTables = false; private Map catalogProperties; + private long kekCacheTimeout; public HiveCatalog() {} @@ -115,6 +117,11 @@ public void initialize(String inputName, Map properties) { if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_IMPL)) { this.keyManagementClient = EncryptionUtil.createKmsClient(properties); + this.kekCacheTimeout = + PropertyUtil.propertyAsLong( + properties, + CatalogProperties.KEK_CACHE_TIMEOUT_MS, + CatalogProperties.KEK_CACHE_TIMEOUT_MS_DEFAULT); } this.clients = new CachedClientPool(conf, properties); @@ -520,7 +527,7 @@ public TableOperations newTableOps(TableIdentifier tableIdentifier) { String dbName = tableIdentifier.namespace().level(0); String tableName = tableIdentifier.name(); return new HiveTableOperations( - conf, clients, fileIO, keyManagementClient, name, dbName, tableName); + conf, clients, fileIO, keyManagementClient, name, dbName, tableName, kekCacheTimeout); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 618e5edb2a51..9888c4bc4ef8 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -50,6 +50,7 @@ import org.apache.iceberg.encryption.EncryptionUtil; import org.apache.iceberg.encryption.KeyManagementClient; import org.apache.iceberg.encryption.PlaintextEncryptionManager; +import org.apache.iceberg.encryption.StandardEncryptionManager; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; @@ -113,6 +114,7 @@ public static String translateToIcebergProp(String hmsProp) { private final FileIO fileIO; private final KeyManagementClient keyManagementClient; private final ClientPool metaClients; + private final long kekCacheTimeout; private EncryptionManager encryptionManager; private EncryptingFileIO encryptingFileIO; @@ -128,7 +130,7 @@ protected HiveTableOperations( String catalogName, String database, String table) { - this(conf, metaClients, fileIO, null, catalogName, database, table); + this(conf, metaClients, fileIO, null, catalogName, database, table, 0L); } protected HiveTableOperations( @@ -138,7 +140,8 @@ protected HiveTableOperations( KeyManagementClient keyManagementClient, String catalogName, String database, - String table) { + String table, + long kekCacheTimeout) { this.conf = conf; this.metaClients = metaClients; this.fileIO = fileIO; @@ -154,6 +157,7 @@ protected HiveTableOperations( this.maxHiveTablePropertySize = conf.getLong(HIVE_TABLE_PROPERTY_MAX_SIZE, HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); this.encryptedTable = false; + this.kekCacheTimeout = kekCacheTimeout; } @Override @@ -198,7 +202,7 @@ public EncryptionManager encryption() { encryptedTable = true; encryptionManager = EncryptionUtil.createEncryptionManager( - encryptionKeyId, encryptionDekLength, keyManagementClient); + encryptionKeyId, encryptionDekLength, keyManagementClient, kekCacheTimeout); } else { encryptionManager = PlaintextEncryptionManager.instance(); } @@ -236,7 +240,15 @@ protected void doRefresh() { @Override protected void doCommit(TableMetadata base, TableMetadata metadata) { boolean newTable = base == null; - encryptionPropsFromMetadata(metadata); + + encryptionPropsFromMetadata(metadata.properties()); + + if (encryption() instanceof StandardEncryptionManager) { + StandardEncryptionManager standardEncryptionManager = + (StandardEncryptionManager) encryptionManager; + metadata.setKekCache(standardEncryptionManager.kekCache()); + } + String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); boolean hiveEngineEnabled = hiveEngineEnabled(metadata, conf); boolean keepHiveStats = conf.getBoolean(ConfigProperties.KEEP_HIVE_STATS, false); @@ -389,13 +401,13 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { "Committed to table {} with the new metadata location {}", fullName, newMetadataLocation); } - private void encryptionPropsFromMetadata(TableMetadata metadata) { + private void encryptionPropsFromMetadata(Map tableProperties) { if (encryptionKeyId == null) { - encryptionKeyId = metadata.property(TableProperties.ENCRYPTION_TABLE_KEY, null); + encryptionKeyId = tableProperties.get(TableProperties.ENCRYPTION_TABLE_KEY); } if (encryptionKeyId != null && encryptionDekLength <= 0) { - String dekLength = metadata.property(TableProperties.ENCRYPTION_DEK_LENGTH, null); + String dekLength = tableProperties.get(TableProperties.ENCRYPTION_DEK_LENGTH); encryptionDekLength = (dekLength == null) ? TableProperties.ENCRYPTION_DEK_LENGTH_DEFAULT From 453a8b987b813608f3692a0ade53c2e0402ec2d2 Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 4 Aug 2024 14:24:29 +0300 Subject: [PATCH 19/20] address review comments --- .../java/org/apache/iceberg/hive/HiveCatalog.java | 10 +++++----- .../org/apache/iceberg/hive/HiveTableOperations.java | 12 +++++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 0552e7665cd6..02525fd34af1 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -83,7 +83,7 @@ public class HiveCatalog extends BaseMetastoreCatalog implements SupportsNamespa private ClientPool clients; private boolean listAllTables = false; private Map catalogProperties; - private long kekCacheTimeout; + private long writerKekTimeout; public HiveCatalog() {} @@ -117,11 +117,11 @@ public void initialize(String inputName, Map properties) { if (catalogProperties.containsKey(CatalogProperties.ENCRYPTION_KMS_IMPL)) { this.keyManagementClient = EncryptionUtil.createKmsClient(properties); - this.kekCacheTimeout = + this.writerKekTimeout = PropertyUtil.propertyAsLong( properties, - CatalogProperties.KEK_CACHE_TIMEOUT_MS, - CatalogProperties.KEK_CACHE_TIMEOUT_MS_DEFAULT); + CatalogProperties.WRITER_KEK_TIMEOUT_MS, + CatalogProperties.WRITER_KEK_TIMEOUT_MS_DEFAULT); } this.clients = new CachedClientPool(conf, properties); @@ -527,7 +527,7 @@ public TableOperations newTableOps(TableIdentifier tableIdentifier) { String dbName = tableIdentifier.namespace().level(0); String tableName = tableIdentifier.name(); return new HiveTableOperations( - conf, clients, fileIO, keyManagementClient, name, dbName, tableName, kekCacheTimeout); + conf, clients, fileIO, keyManagementClient, name, dbName, tableName, writerKekTimeout); } @Override diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 9888c4bc4ef8..6ab813f9a78d 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -114,7 +114,7 @@ public static String translateToIcebergProp(String hmsProp) { private final FileIO fileIO; private final KeyManagementClient keyManagementClient; private final ClientPool metaClients; - private final long kekCacheTimeout; + private final long writerKekTimeout; private EncryptionManager encryptionManager; private EncryptingFileIO encryptingFileIO; @@ -141,7 +141,7 @@ protected HiveTableOperations( String catalogName, String database, String table, - long kekCacheTimeout) { + long writerKekTimeout) { this.conf = conf; this.metaClients = metaClients; this.fileIO = fileIO; @@ -157,7 +157,7 @@ protected HiveTableOperations( this.maxHiveTablePropertySize = conf.getLong(HIVE_TABLE_PROPERTY_MAX_SIZE, HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT); this.encryptedTable = false; - this.kekCacheTimeout = kekCacheTimeout; + this.writerKekTimeout = writerKekTimeout; } @Override @@ -202,7 +202,7 @@ public EncryptionManager encryption() { encryptedTable = true; encryptionManager = EncryptionUtil.createEncryptionManager( - encryptionKeyId, encryptionDekLength, keyManagementClient, kekCacheTimeout); + encryptionKeyId, encryptionDekLength, keyManagementClient, writerKekTimeout); } else { encryptionManager = PlaintextEncryptionManager.instance(); } @@ -244,9 +244,7 @@ protected void doCommit(TableMetadata base, TableMetadata metadata) { encryptionPropsFromMetadata(metadata.properties()); if (encryption() instanceof StandardEncryptionManager) { - StandardEncryptionManager standardEncryptionManager = - (StandardEncryptionManager) encryptionManager; - metadata.setKekCache(standardEncryptionManager.kekCache()); + metadata.setKekCache(EncryptionUtil.kekCache(encryptionManager)); } String newMetadataLocation = writeNewMetadataIfRequired(newTable, metadata); From bd6206701bfd40987937e37b9bfa4ef5e013489d Mon Sep 17 00:00:00 2001 From: Gidon Gershinsky Date: Sun, 4 Aug 2024 14:41:13 +0300 Subject: [PATCH 20/20] visible for testing --- .../main/java/org/apache/iceberg/hive/HiveTableOperations.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 6ab813f9a78d..793d05a3bd46 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -122,7 +122,7 @@ public static String translateToIcebergProp(String hmsProp) { private String encryptionKeyId; private int encryptionDekLength; - /** Tests only */ + @VisibleForTesting protected HiveTableOperations( Configuration conf, ClientPool metaClients,