diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index c57b80f7960c..d0ee7bec4cca 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.metastore.RetryingHMSHandler; import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.common.DynConstructors; import org.apache.iceberg.common.DynMethods; import org.apache.iceberg.hadoop.Util; @@ -193,6 +194,10 @@ public Table getTable(String dbName, String tableName) throws TException, Interr return clientPool.run(client -> client.getTable(dbName, tableName)); } + public Table getTable(TableIdentifier identifier) throws TException, InterruptedException { + return getTable(identifier.namespace().toString(), identifier.name()); + } + private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception { HiveConf serverConf = new HiveConf(conf); serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 438ca8768d2c..582d3d6dc55c 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -157,10 +157,18 @@ public void preDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) { "TRUE".equalsIgnoreCase(hmsTable.getParameters().get(InputFormatConfig.EXTERNAL_TABLE_PURGE)); if (deleteIcebergTable && Catalogs.hiveCatalog(conf, catalogProperties)) { - // Store the metadata and the id for deleting the actual table data - String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP); - this.deleteIo = Catalogs.loadTable(conf, catalogProperties).io(); - this.deleteMetadata = TableMetadataParser.read(deleteIo, metadataLocation); + // Store the metadata and the io for deleting the actual table data + try { + String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP); + this.deleteIo = Catalogs.loadTable(conf, catalogProperties).io(); + this.deleteMetadata = TableMetadataParser.read(deleteIo, metadataLocation); + } catch (Exception e) { + LOG.error("preDropTable: Error during loading Iceberg table or parsing its metadata for HMS table: {}.{}. " + + "In some cases, this might lead to undeleted metadata files under the table directory: {}. " + + "Please double check and, if needed, manually delete any dangling files/folders, if any. " + + "In spite of this error, the HMS table drop operation should proceed as normal.", + hmsTable.getDbName(), hmsTable.getTableName(), hmsTable.getSd().getLocation(), e); + } } } @@ -178,7 +186,7 @@ public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, Catalogs.dropTable(conf, catalogProperties); } else { // do nothing if metadata folder has been deleted already (Hive 4 behaviour for purge=TRUE) - if (deleteIo.newInputFile(deleteMetadata.location()).exists()) { + if (deleteMetadata != null && deleteIo.newInputFile(deleteMetadata.location()).exists()) { CatalogUtil.dropTableData(deleteIo, deleteMetadata); } } diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 2b2eb6b59a2f..74d36efed9ad 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -316,6 +316,34 @@ public void testDeleteBackingTable() throws TException, IOException, Interrupted } } + @Test + public void testDropTableWithCorruptedMetadata() throws TException, IOException, InterruptedException { + Assume.assumeTrue("Only HiveCatalog attempts to load the Iceberg table prior to dropping it.", + testTableType == TestTables.TestTableType.HIVE_CATALOG); + + // create test table + TableIdentifier identifier = TableIdentifier.of("default", "customers"); + testTables.createTable(shell, identifier.name(), + HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, FileFormat.PARQUET, ImmutableList.of()); + + // enable data purging (this should set external.table.purge=true on the HMS table) + Table table = testTables.loadTable(identifier); + table.updateProperties().set(GC_ENABLED, "true").commit(); + + // delete its current snapshot file (i.e. corrupt the metadata to make the Iceberg table unloadable) + String metadataLocation = shell.metastore().getTable(identifier) + .getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP); + table.io().deleteFile(metadataLocation); + + // check if HMS table is nonetheless still droppable + shell.executeStatement(String.format("DROP TABLE %s", identifier)); + AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, + "Table does not exist", () -> { + testTables.loadTable(identifier); + } + ); + } + @Test public void testCreateTableError() { TableIdentifier identifier = TableIdentifier.of("default", "withShell2");