From e5cf435768adcca34b902fdc4c2c420249550d78 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Tue, 12 Apr 2022 11:09:04 -0700 Subject: [PATCH 01/12] Expose default partition spec and sort order in HMS. --- .../org/apache/iceberg/TableProperties.java | 18 +++++- .../iceberg/hive/HiveTableOperations.java | 18 ++++++ .../apache/iceberg/hive/TestHiveCatalog.java | 57 +++++++++++++------ 3 files changed, 76 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index e955b7f77c96..9a2d3bf900c4 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -75,6 +75,20 @@ private TableProperties() { */ public static final String CURRENT_SNAPSHOT_TIMESTAMP = "current-snapshot-timestamp-ms"; + /** + * Reserved table property for default partition spec. + *

+ * This reserved property is used to store the default partition spec. + */ + public static final String DEFAULT_PARTITION_SPEC = "default-partition-spec"; + + /** + * Reserved table property for default sort order. + *

+ * This reserved property is used to store the default sort order. + */ + public static final String DEFAULT_SORT_ORDER = "default-sort-order"; + /** * Reserved Iceberg table properties list. *

@@ -87,7 +101,9 @@ private TableProperties() { SNAPSHOT_COUNT, CURRENT_SNAPSHOT_ID, CURRENT_SNAPSHOT_SUMMARY, - CURRENT_SNAPSHOT_TIMESTAMP + CURRENT_SNAPSHOT_TIMESTAMP, + DEFAULT_PARTITION_SPEC, + DEFAULT_SORT_ORDER ); public static final String COMMIT_NUM_RETRIES = "commit.retry.num-retries"; diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 9b2223a51670..6bd52f4601f7 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -52,8 +52,10 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; +import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; +import org.apache.iceberg.SortOrderParser; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; import org.apache.iceberg.exceptions.AlreadyExistsException; @@ -399,6 +401,8 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM } setSnapshotStats(metadata, parameters); + setPartitionSpec(metadata, parameters); + setSortOrder(metadata, parameters); tbl.setParameters(parameters); } @@ -433,6 +437,20 @@ void setSnapshotSummary(Map parameters, Snapshot currentSnapshot } } + private void setPartitionSpec(TableMetadata metadata, Map parameters) { + parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); + if (metadata.spec() != null && metadata.spec().isPartitioned()) { + parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, PartitionSpecParser.toJson(metadata.spec())); + } + } + + private void setSortOrder(TableMetadata metadata, Map parameters) { + parameters.remove(TableProperties.DEFAULT_SORT_ORDER); + if (metadata.sortOrder() != null && metadata.sortOrder().isSorted()) { + parameters.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(metadata.sortOrder())); + } + } + private StorageDescriptor storageDescriptor(TableMetadata metadata, boolean hiveEngineEnabled) { final StorageDescriptor storageDescriptor = new StorageDescriptor(); diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 855ba4f8119b..882643546b2e 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -32,9 +32,11 @@ import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SortOrder; +import org.apache.iceberg.SortOrderParser; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.Transaction; @@ -60,6 +62,7 @@ import static org.apache.iceberg.NullOrder.NULLS_FIRST; import static org.apache.iceberg.SortDirection.ASC; +import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; import static org.apache.iceberg.types.Types.NestedField.required; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -232,7 +235,7 @@ public void testReplaceTxnBuilder() throws Exception { } @Test - public void testCreateTableDefaultSortOrder() { + public void testCreateTableDefaultSortOrder() throws Exception { Schema schema = new Schema( required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()) @@ -246,13 +249,16 @@ public void testCreateTableDefaultSortOrder() { Table table = catalog.createTable(tableIdent, schema, spec); Assert.assertEquals("Order ID must match", 0, table.sortOrder().orderId()); Assert.assertTrue("Order must unsorted", table.sortOrder().isUnsorted()); + + Assert.assertTrue("Must not have default sort order in catalog", + !hmsTableParameters().containsKey(DEFAULT_SORT_ORDER)); } finally { catalog.dropTable(tableIdent); } } @Test - public void testCreateTableCustomSortOrder() { + public void testCreateTableCustomSortOrder() throws Exception { Schema schema = new Schema( required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()) @@ -277,6 +283,8 @@ public void testCreateTableCustomSortOrder() { Assert.assertEquals("Null order must match ", NULLS_FIRST, sortOrder.fields().get(0).nullOrder()); Transform transform = Transforms.identity(Types.IntegerType.get()); Assert.assertEquals("Transform must match", transform, sortOrder.fields().get(0).transform()); + + Assert.assertEquals(SortOrderParser.toJson(table.sortOrder()), hmsTableParameters().get(DEFAULT_SORT_ORDER)); } finally { catalog.dropTable(tableIdent); } @@ -469,13 +477,7 @@ public void testUUIDinTableProperties() throws Exception { .withLocation(location) .create(); - String tableName = tableIdentifier.name(); - org.apache.hadoop.hive.metastore.api.Table hmsTable = - metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName); - - // check parameters are in expected state - Map parameters = hmsTable.getParameters(); - Assert.assertNotNull(parameters.get(TableProperties.UUID)); + Assert.assertNotNull(hmsTableParameters().get(TableProperties.UUID)); } finally { catalog.dropTable(tableIdentifier); } @@ -495,12 +497,8 @@ public void testSnapshotStatsTableProperties() throws Exception { .withLocation(location) .create(); - String tableName = tableIdentifier.name(); - org.apache.hadoop.hive.metastore.api.Table hmsTable = - metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName); - // check whether parameters are in expected state - Map parameters = hmsTable.getParameters(); + Map parameters = hmsTableParameters(); Assert.assertEquals("0", parameters.get(TableProperties.SNAPSHOT_COUNT)); Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY)); Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_ID)); @@ -517,8 +515,7 @@ public void testSnapshotStatsTableProperties() throws Exception { icebergTable.newFastAppend().appendFile(file).commit(); // check whether parameters are in expected state - hmsTable = metastoreClient.getTable(tableIdentifier.namespace().level(0), tableName); - parameters = hmsTable.getParameters(); + parameters = hmsTableParameters(); Assert.assertEquals("1", parameters.get(TableProperties.SNAPSHOT_COUNT)); String summary = JsonUtil.mapper().writeValueAsString(icebergTable.currentSnapshot().summary()); Assert.assertEquals(summary, parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY)); @@ -562,6 +559,34 @@ public void testSetSnapshotSummary() throws Exception { Assert.assertEquals("The snapshot summary must not be in parameters due to the size limit", 0, parameters.size()); } + @Test + public void testSetDefaultPartitionSpec() throws Exception { + Schema schema = new Schema( + required(1, "id", Types.IntegerType.get(), "unique ID"), + required(2, "data", Types.StringType.get()) + ); + PartitionSpec spec = PartitionSpec.builderFor(schema) + .bucket("data", 16) + .build(); + TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); + + try { + catalog.buildTable(tableIdent, schema) + .withPartitionSpec(spec) + .create(); + + Assert.assertEquals(PartitionSpecParser.toJson(spec), + hmsTableParameters().get(TableProperties.DEFAULT_PARTITION_SPEC)); + } finally { + catalog.dropTable(tableIdent); + } + } + + private Map hmsTableParameters() throws TException { + org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(DB_NAME, "tbl"); + return hmsTable.getParameters(); + } + @Test public void testConstructorWarehousePathWithEndSlash() { HiveCatalog catalogWithSlash = new HiveCatalog(); From 409872bff36291896d9817e957338c4738fa5ea7 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Wed, 13 Apr 2022 18:31:31 -0700 Subject: [PATCH 02/12] Resolve comments. --- .../apache/iceberg/hive/TestHiveCatalog.java | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 882643546b2e..8b78f4a1bdfb 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -32,7 +32,6 @@ import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SortOrder; @@ -63,6 +62,7 @@ import static org.apache.iceberg.NullOrder.NULLS_FIRST; import static org.apache.iceberg.SortDirection.ASC; import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; +import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -250,8 +250,8 @@ public void testCreateTableDefaultSortOrder() throws Exception { Assert.assertEquals("Order ID must match", 0, table.sortOrder().orderId()); Assert.assertTrue("Order must unsorted", table.sortOrder().isUnsorted()); - Assert.assertTrue("Must not have default sort order in catalog", - !hmsTableParameters().containsKey(DEFAULT_SORT_ORDER)); + Assert.assertFalse("Must not have default sort order in catalog", + hmsTableParameters().containsKey(DEFAULT_SORT_ORDER)); } finally { catalog.dropTable(tableIdent); } @@ -565,18 +565,16 @@ public void testSetDefaultPartitionSpec() throws Exception { required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()) ); - PartitionSpec spec = PartitionSpec.builderFor(schema) - .bucket("data", 16) - .build(); TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); try { - catalog.buildTable(tableIdent, schema) - .withPartitionSpec(spec) - .create(); + Table table = catalog.buildTable(tableIdent, schema).create(); + Assert.assertFalse("Must not have default partition spec", + hmsTableParameters().containsKey(TableProperties.DEFAULT_PARTITION_SPEC)); - Assert.assertEquals(PartitionSpecParser.toJson(spec), - hmsTableParameters().get(TableProperties.DEFAULT_PARTITION_SPEC)); + table.updateSpec().addField(bucket("data", 16)).commit(); + Assert.assertTrue("Must have default partition spec", + hmsTableParameters().containsKey(TableProperties.DEFAULT_PARTITION_SPEC)); } finally { catalog.dropTable(tableIdent); } From 9bf2a0fac358e6a24227bdd8a150385e9b745893 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Thu, 21 Apr 2022 12:07:39 -0700 Subject: [PATCH 03/12] Add source name in spec json string --- .../apache/iceberg/PartitionSpecParser.java | 51 +++++++++++++------ .../org/apache/iceberg/TableProperties.java | 20 ++------ .../iceberg/TestPartitionSpecParser.java | 2 + .../iceberg/hive/HiveTableOperations.java | 2 +- .../apache/iceberg/hive/TestHiveCatalog.java | 5 +- 5 files changed, 44 insertions(+), 36 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java index 67ec98b763de..62b94fc4e6b9 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java +++ b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java @@ -39,42 +39,39 @@ private PartitionSpecParser() { private static final String SPEC_ID = "spec-id"; private static final String FIELDS = "fields"; private static final String SOURCE_ID = "source-id"; + private static final String SOURCE_NAME = "source-name"; private static final String FIELD_ID = "field-id"; private static final String TRANSFORM = "transform"; private static final String NAME = "name"; - public static void toJson(PartitionSpec spec, JsonGenerator generator) throws IOException { - toJson(spec.toUnbound(), generator); + public static String toJsonWithSourceName(PartitionSpec spec) { + return toJson(spec.toUnbound(), spec.schema(), false); } public static String toJson(PartitionSpec spec) { return toJson(spec, false); } - public static String toJson(PartitionSpec spec, boolean pretty) { - return toJson(spec.toUnbound(), pretty); + public static String toJson(UnboundPartitionSpec spec) { + return toJson(spec, false); } - public static void toJson(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { - generator.writeStartObject(); - generator.writeNumberField(SPEC_ID, spec.specId()); - generator.writeFieldName(FIELDS); - toJsonFields(spec, generator); - generator.writeEndObject(); + public static String toJson(PartitionSpec spec, boolean pretty) { + return toJson(spec.toUnbound(), null, pretty); } - public static String toJson(UnboundPartitionSpec spec) { - return toJson(spec, false); + public static String toJson(UnboundPartitionSpec spec, boolean pretty) { + return toJson(spec, null, pretty); } - public static String toJson(UnboundPartitionSpec spec, boolean pretty) { + private static String toJson(UnboundPartitionSpec spec, Schema schema, boolean pretty) { try { StringWriter writer = new StringWriter(); JsonGenerator generator = JsonUtil.factory().createGenerator(writer); if (pretty) { generator.useDefaultPrettyPrinter(); } - toJson(spec, generator); + toJson(spec, generator, schema); generator.flush(); return writer.toString(); @@ -83,6 +80,22 @@ public static String toJson(UnboundPartitionSpec spec, boolean pretty) { } } + public static void toJson(PartitionSpec spec, JsonGenerator generator) throws IOException { + toJson(spec.toUnbound(), generator); + } + + public static void toJson(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { + toJson(spec, generator, null); + } + + private static void toJson(UnboundPartitionSpec spec, JsonGenerator generator, Schema schema) throws IOException { + generator.writeStartObject(); + generator.writeNumberField(SPEC_ID, spec.specId()); + generator.writeFieldName(FIELDS); + toJsonFields(spec, generator, schema); + generator.writeEndObject(); + } + public static PartitionSpec fromJson(Schema schema, JsonNode json) { return fromJson(json).bind(schema); } @@ -112,16 +125,22 @@ public static PartitionSpec fromJson(Schema schema, String json) { } static void toJsonFields(PartitionSpec spec, JsonGenerator generator) throws IOException { - toJsonFields(spec.toUnbound(), generator); + toJsonFields(spec.toUnbound(), generator, null); } - static void toJsonFields(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { + static void toJsonFields(UnboundPartitionSpec spec, JsonGenerator generator, Schema schema) throws IOException { generator.writeStartArray(); for (UnboundPartitionSpec.UnboundPartitionField field : spec.fields()) { generator.writeStartObject(); generator.writeStringField(NAME, field.name()); generator.writeStringField(TRANSFORM, field.transformAsString()); generator.writeNumberField(SOURCE_ID, field.sourceId()); + if (schema != null) { + Types.NestedField nestedField = schema.findField(field.sourceId()); + if (nestedField != null) { + generator.writeStringField(SOURCE_NAME, nestedField.name()); + } + } generator.writeNumberField(FIELD_ID, field.partitionId()); generator.writeEndObject(); } diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index 9a2d3bf900c4..2191e6baeb76 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -41,51 +41,37 @@ private TableProperties() { public static final String FORMAT_VERSION = "format-version"; /** - * Reserved table property for UUID. - *

- * This reserved property is used to store the UUID of the table. + * Reserved table property for table UUID. */ public static final String UUID = "uuid"; /** * Reserved table property for the total number of snapshots. - *

- * This reserved property is used to store the total number of snapshots. */ public static final String SNAPSHOT_COUNT = "snapshot-count"; /** * Reserved table property for current snapshot summary. - *

- * This reserved property is used to store the current snapshot summary. */ public static final String CURRENT_SNAPSHOT_SUMMARY = "current-snapshot-summary"; /** * Reserved table property for current snapshot id. - *

- * This reserved property is used to store the current snapshot id. */ public static final String CURRENT_SNAPSHOT_ID = "current-snapshot-id"; /** * Reserved table property for current snapshot timestamp. - *

- * This reserved property is used to store the current snapshot timestamp. */ public static final String CURRENT_SNAPSHOT_TIMESTAMP = "current-snapshot-timestamp-ms"; /** - * Reserved table property for default partition spec. - *

- * This reserved property is used to store the default partition spec. + * Reserved table property for the JSON representation of current partition spec. */ public static final String DEFAULT_PARTITION_SPEC = "default-partition-spec"; /** - * Reserved table property for default sort order. - *

- * This reserved property is used to store the default sort order. + * Reserved table property for the JSON representation of current sort order. */ public static final String DEFAULT_SORT_ORDER = "default-sort-order"; diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java index 847ff4283ab1..8dae4ad59044 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java @@ -39,6 +39,8 @@ public void testToJsonForV1Table() { " } ]\n" + "}"; Assert.assertEquals(expected, PartitionSpecParser.toJson(table.spec(), true)); + Assert.assertTrue("Json must contain source name", + PartitionSpecParser.toJsonWithSourceName(table.spec()).contains("\"source-name\":\"data\"")); PartitionSpec spec = PartitionSpec.builderFor(table.schema()) .bucket("id", 8) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 6bd52f4601f7..38754b5df101 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -440,7 +440,7 @@ void setSnapshotSummary(Map parameters, Snapshot currentSnapshot private void setPartitionSpec(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); if (metadata.spec() != null && metadata.spec().isPartitioned()) { - parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, PartitionSpecParser.toJson(metadata.spec())); + parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, PartitionSpecParser.toJsonWithSourceName(metadata.spec())); } } diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 8b78f4a1bdfb..c5a237955a46 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -32,6 +32,7 @@ import org.apache.iceberg.DataFiles; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SortOrder; @@ -573,8 +574,8 @@ public void testSetDefaultPartitionSpec() throws Exception { hmsTableParameters().containsKey(TableProperties.DEFAULT_PARTITION_SPEC)); table.updateSpec().addField(bucket("data", 16)).commit(); - Assert.assertTrue("Must have default partition spec", - hmsTableParameters().containsKey(TableProperties.DEFAULT_PARTITION_SPEC)); + Assert.assertEquals(PartitionSpecParser.toJsonWithSourceName(table.spec()), + hmsTableParameters().get(TableProperties.DEFAULT_PARTITION_SPEC)); } finally { catalog.dropTable(tableIdent); } From 801661803f8b09c732321d9db11c9af288660348 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Thu, 21 Apr 2022 15:27:35 -0700 Subject: [PATCH 04/12] Add check for thread hold --- .../apache/iceberg/hive/HiveTableOperations.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 38754b5df101..3c27a3e0df89 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -440,14 +440,25 @@ void setSnapshotSummary(Map parameters, Snapshot currentSnapshot private void setPartitionSpec(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); if (metadata.spec() != null && metadata.spec().isPartitioned()) { - parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, PartitionSpecParser.toJsonWithSourceName(metadata.spec())); + String spec = PartitionSpecParser.toJsonWithSourceName(metadata.spec()); + if (spec.length() <= maxHiveTablePropertySize) { + parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, spec); + } else { + LOG.warn("Not exposing the current partition spec in HMS since it exceeds {} characters", + maxHiveTablePropertySize); + } } } private void setSortOrder(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_SORT_ORDER); if (metadata.sortOrder() != null && metadata.sortOrder().isSorted()) { - parameters.put(TableProperties.DEFAULT_SORT_ORDER, SortOrderParser.toJson(metadata.sortOrder())); + String sortOrder = SortOrderParser.toJson(metadata.sortOrder()); + if (sortOrder.length() <= maxHiveTablePropertySize) { + parameters.put(TableProperties.DEFAULT_SORT_ORDER, sortOrder); + } else { + LOG.warn("Not exposing the current sort order in HMS since it exceeds {} characters", maxHiveTablePropertySize); + } } } From 65cd12c9f559beeb7c4a51dcba0f50f887e09792 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Fri, 22 Apr 2022 12:37:36 -0700 Subject: [PATCH 05/12] Resolve comments. --- .../apache/iceberg/PartitionSpecParser.java | 51 ++++++------------- .../org/apache/iceberg/TableProperties.java | 6 +++ .../iceberg/TestPartitionSpecParser.java | 2 - .../iceberg/hive/HiveTableOperations.java | 16 +++++- .../apache/iceberg/hive/TestHiveCatalog.java | 21 +++++++- 5 files changed, 57 insertions(+), 39 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java index 62b94fc4e6b9..67ec98b763de 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java +++ b/core/src/main/java/org/apache/iceberg/PartitionSpecParser.java @@ -39,39 +39,42 @@ private PartitionSpecParser() { private static final String SPEC_ID = "spec-id"; private static final String FIELDS = "fields"; private static final String SOURCE_ID = "source-id"; - private static final String SOURCE_NAME = "source-name"; private static final String FIELD_ID = "field-id"; private static final String TRANSFORM = "transform"; private static final String NAME = "name"; - public static String toJsonWithSourceName(PartitionSpec spec) { - return toJson(spec.toUnbound(), spec.schema(), false); + public static void toJson(PartitionSpec spec, JsonGenerator generator) throws IOException { + toJson(spec.toUnbound(), generator); } public static String toJson(PartitionSpec spec) { return toJson(spec, false); } - public static String toJson(UnboundPartitionSpec spec) { - return toJson(spec, false); + public static String toJson(PartitionSpec spec, boolean pretty) { + return toJson(spec.toUnbound(), pretty); } - public static String toJson(PartitionSpec spec, boolean pretty) { - return toJson(spec.toUnbound(), null, pretty); + public static void toJson(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { + generator.writeStartObject(); + generator.writeNumberField(SPEC_ID, spec.specId()); + generator.writeFieldName(FIELDS); + toJsonFields(spec, generator); + generator.writeEndObject(); } - public static String toJson(UnboundPartitionSpec spec, boolean pretty) { - return toJson(spec, null, pretty); + public static String toJson(UnboundPartitionSpec spec) { + return toJson(spec, false); } - private static String toJson(UnboundPartitionSpec spec, Schema schema, boolean pretty) { + public static String toJson(UnboundPartitionSpec spec, boolean pretty) { try { StringWriter writer = new StringWriter(); JsonGenerator generator = JsonUtil.factory().createGenerator(writer); if (pretty) { generator.useDefaultPrettyPrinter(); } - toJson(spec, generator, schema); + toJson(spec, generator); generator.flush(); return writer.toString(); @@ -80,22 +83,6 @@ private static String toJson(UnboundPartitionSpec spec, Schema schema, boolean p } } - public static void toJson(PartitionSpec spec, JsonGenerator generator) throws IOException { - toJson(spec.toUnbound(), generator); - } - - public static void toJson(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { - toJson(spec, generator, null); - } - - private static void toJson(UnboundPartitionSpec spec, JsonGenerator generator, Schema schema) throws IOException { - generator.writeStartObject(); - generator.writeNumberField(SPEC_ID, spec.specId()); - generator.writeFieldName(FIELDS); - toJsonFields(spec, generator, schema); - generator.writeEndObject(); - } - public static PartitionSpec fromJson(Schema schema, JsonNode json) { return fromJson(json).bind(schema); } @@ -125,22 +112,16 @@ public static PartitionSpec fromJson(Schema schema, String json) { } static void toJsonFields(PartitionSpec spec, JsonGenerator generator) throws IOException { - toJsonFields(spec.toUnbound(), generator, null); + toJsonFields(spec.toUnbound(), generator); } - static void toJsonFields(UnboundPartitionSpec spec, JsonGenerator generator, Schema schema) throws IOException { + static void toJsonFields(UnboundPartitionSpec spec, JsonGenerator generator) throws IOException { generator.writeStartArray(); for (UnboundPartitionSpec.UnboundPartitionField field : spec.fields()) { generator.writeStartObject(); generator.writeStringField(NAME, field.name()); generator.writeStringField(TRANSFORM, field.transformAsString()); generator.writeNumberField(SOURCE_ID, field.sourceId()); - if (schema != null) { - Types.NestedField nestedField = schema.findField(field.sourceId()); - if (nestedField != null) { - generator.writeStringField(SOURCE_NAME, nestedField.name()); - } - } generator.writeNumberField(FIELD_ID, field.partitionId()); generator.writeEndObject(); } diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index 2191e6baeb76..e965d2709eca 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -65,6 +65,11 @@ private TableProperties() { */ public static final String CURRENT_SNAPSHOT_TIMESTAMP = "current-snapshot-timestamp-ms"; + /** + * Reserved table property for the JSON representation of current schema. + */ + public static final String CURRENT_SCHEMA = "current-schema"; + /** * Reserved table property for the JSON representation of current partition spec. */ @@ -88,6 +93,7 @@ private TableProperties() { CURRENT_SNAPSHOT_ID, CURRENT_SNAPSHOT_SUMMARY, CURRENT_SNAPSHOT_TIMESTAMP, + CURRENT_SCHEMA, DEFAULT_PARTITION_SPEC, DEFAULT_SORT_ORDER ); diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java index 8dae4ad59044..847ff4283ab1 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java @@ -39,8 +39,6 @@ public void testToJsonForV1Table() { " } ]\n" + "}"; Assert.assertEquals(expected, PartitionSpecParser.toJson(table.spec(), true)); - Assert.assertTrue("Json must contain source name", - PartitionSpecParser.toJsonWithSourceName(table.spec()).contains("\"source-name\":\"data\"")); PartitionSpec spec = PartitionSpec.builderFor(table.schema()) .bucket("id", 8) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 3c27a3e0df89..806b2fdd87c7 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -53,6 +53,7 @@ import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.ClientPool; import org.apache.iceberg.PartitionSpecParser; +import org.apache.iceberg.SchemaParser; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotSummary; import org.apache.iceberg.SortOrderParser; @@ -401,6 +402,7 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM } setSnapshotStats(metadata, parameters); + setSchema(metadata, parameters); setPartitionSpec(metadata, parameters); setSortOrder(metadata, parameters); @@ -437,10 +439,22 @@ void setSnapshotSummary(Map parameters, Snapshot currentSnapshot } } + private void setSchema(TableMetadata metadata, Map parameters) { + parameters.remove(TableProperties.CURRENT_SCHEMA); + if (metadata.schema() != null) { + String schema = SchemaParser.toJson(metadata.schema()); + if (schema.length() <= maxHiveTablePropertySize) { + parameters.put(TableProperties.CURRENT_SCHEMA, schema); + } else { + LOG.warn("Not exposing the current schema in HMS since it exceeds {} characters", maxHiveTablePropertySize); + } + } + } + private void setPartitionSpec(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); if (metadata.spec() != null && metadata.spec().isPartitioned()) { - String spec = PartitionSpecParser.toJsonWithSourceName(metadata.spec()); + String spec = PartitionSpecParser.toJson(metadata.spec()); if (spec.length() <= maxHiveTablePropertySize) { parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, spec); } else { diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index c5a237955a46..832964db232f 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -34,6 +34,7 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; +import org.apache.iceberg.SchemaParser; import org.apache.iceberg.Snapshot; import org.apache.iceberg.SortOrder; import org.apache.iceberg.SortOrderParser; @@ -574,13 +575,31 @@ public void testSetDefaultPartitionSpec() throws Exception { hmsTableParameters().containsKey(TableProperties.DEFAULT_PARTITION_SPEC)); table.updateSpec().addField(bucket("data", 16)).commit(); - Assert.assertEquals(PartitionSpecParser.toJsonWithSourceName(table.spec()), + Assert.assertEquals(PartitionSpecParser.toJson(table.spec()), hmsTableParameters().get(TableProperties.DEFAULT_PARTITION_SPEC)); } finally { catalog.dropTable(tableIdent); } } + @Test + public void testSetCurrentSchema() throws Exception { + Schema schema = new Schema( + required(1, "id", Types.IntegerType.get(), "unique ID"), + required(2, "data", Types.StringType.get()) + ); + TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl"); + + try { + Table table = catalog.buildTable(tableIdent, schema).create(); + + Assert.assertEquals(SchemaParser.toJson(table.schema()), + hmsTableParameters().get(TableProperties.CURRENT_SCHEMA)); + } finally { + catalog.dropTable(tableIdent); + } + } + private Map hmsTableParameters() throws TException { org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(DB_NAME, "tbl"); return hmsTable.getParameters(); From cceba61ea5affe8d3e95535644570a9a043d56c9 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Fri, 22 Apr 2022 13:47:24 -0700 Subject: [PATCH 06/12] Fix the test failure. --- .../iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index e588c0b3ac79..7a28a9536b6b 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -624,7 +624,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { Assert.assertEquals(expectedIcebergProperties, icebergTable.properties()); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals(12, hmsParams.size()); + Assert.assertEquals(13, hmsParams.size()); Assert.assertEquals("initial_val", hmsParams.get("custom_property")); Assert.assertEquals("TRUE", hmsParams.get(InputFormatConfig.EXTERNAL_TABLE_PURGE)); Assert.assertEquals("TRUE", hmsParams.get("EXTERNAL")); @@ -662,7 +662,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals(15, hmsParams.size()); // 2 newly-added properties + previous_metadata_location prop + Assert.assertEquals(16, hmsParams.size()); // 2 newly-added properties + previous_metadata_location prop Assert.assertEquals("true", hmsParams.get("new_prop_1")); Assert.assertEquals("false", hmsParams.get("new_prop_2")); Assert.assertEquals("new_val", hmsParams.get("custom_property")); From 88f8c7f557e6ed6b0320b4fea10dd788fac96858 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Fri, 22 Apr 2022 14:11:04 -0700 Subject: [PATCH 07/12] Resolve comments. --- .../iceberg/hive/HiveTableOperations.java | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 806b2fdd87c7..286fe723cb72 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -443,11 +443,7 @@ private void setSchema(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.CURRENT_SCHEMA); if (metadata.schema() != null) { String schema = SchemaParser.toJson(metadata.schema()); - if (schema.length() <= maxHiveTablePropertySize) { - parameters.put(TableProperties.CURRENT_SCHEMA, schema); - } else { - LOG.warn("Not exposing the current schema in HMS since it exceeds {} characters", maxHiveTablePropertySize); - } + setField(parameters, TableProperties.CURRENT_SCHEMA, schema); } } @@ -455,12 +451,7 @@ private void setPartitionSpec(TableMetadata metadata, Map parame parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); if (metadata.spec() != null && metadata.spec().isPartitioned()) { String spec = PartitionSpecParser.toJson(metadata.spec()); - if (spec.length() <= maxHiveTablePropertySize) { - parameters.put(TableProperties.DEFAULT_PARTITION_SPEC, spec); - } else { - LOG.warn("Not exposing the current partition spec in HMS since it exceeds {} characters", - maxHiveTablePropertySize); - } + setField(parameters, TableProperties.DEFAULT_PARTITION_SPEC, spec); } } @@ -468,11 +459,15 @@ private void setSortOrder(TableMetadata metadata, Map parameters parameters.remove(TableProperties.DEFAULT_SORT_ORDER); if (metadata.sortOrder() != null && metadata.sortOrder().isSorted()) { String sortOrder = SortOrderParser.toJson(metadata.sortOrder()); - if (sortOrder.length() <= maxHiveTablePropertySize) { - parameters.put(TableProperties.DEFAULT_SORT_ORDER, sortOrder); - } else { - LOG.warn("Not exposing the current sort order in HMS since it exceeds {} characters", maxHiveTablePropertySize); - } + setField(parameters, TableProperties.DEFAULT_SORT_ORDER, sortOrder); + } + } + + private void setField(Map parameters, String key, String value) { + if (value.length() <= maxHiveTablePropertySize) { + parameters.put(key, value); + } else { + LOG.warn("Not exposing {} in HMS since it exceeds {} characters", key, maxHiveTablePropertySize); } } From 7c9f0d0261361bf456c60f19f383953cc283b0ef Mon Sep 17 00:00:00 2001 From: yufeigu Date: Fri, 22 Apr 2022 14:15:29 -0700 Subject: [PATCH 08/12] Resolve comments. --- core/src/main/java/org/apache/iceberg/TableProperties.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index e965d2709eca..50e434093add 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -71,12 +71,12 @@ private TableProperties() { public static final String CURRENT_SCHEMA = "current-schema"; /** - * Reserved table property for the JSON representation of current partition spec. + * Reserved table property for the JSON representation of current(default) partition spec. */ public static final String DEFAULT_PARTITION_SPEC = "default-partition-spec"; /** - * Reserved table property for the JSON representation of current sort order. + * Reserved table property for the JSON representation of current(default) sort order. */ public static final String DEFAULT_SORT_ORDER = "default-sort-order"; From 49796e7da2ceebbd2506803adaba99359377e427 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Sun, 24 Apr 2022 18:50:05 -0700 Subject: [PATCH 09/12] Resolve comments. --- .../java/org/apache/iceberg/hive/TestHiveCatalog.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 832964db232f..b3a923a6dd15 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -41,6 +41,7 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.Transaction; +import org.apache.iceberg.UpdateSchema; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -595,6 +596,16 @@ public void testSetCurrentSchema() throws Exception { Assert.assertEquals(SchemaParser.toJson(table.schema()), hmsTableParameters().get(TableProperties.CURRENT_SCHEMA)); + + // add many new fields to make the schema json string exceed the limit + UpdateSchema updateSchema = table.updateSchema(); + for (int i = 0; i < 600; i++) { + updateSchema.addColumn("new_col_" + i, Types.StringType.get()); + } + updateSchema.commit(); + + Assert.assertTrue(SchemaParser.toJson(table.schema()).length() > 32672); + Assert.assertNull(hmsTableParameters().get(TableProperties.CURRENT_SCHEMA)); } finally { catalog.dropTable(tableIdent); } From fcb5d485308f3b94333c0155191d79e5197b32c0 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 25 Apr 2022 15:33:32 -0700 Subject: [PATCH 10/12] Resolve comments --- .../iceberg/hive/HiveTableOperations.java | 25 ++++++--- .../apache/iceberg/hive/TestHiveCatalog.java | 54 +++++++++++++++---- 2 files changed, 61 insertions(+), 18 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java index 286fe723cb72..1303fd15d89b 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveTableOperations.java @@ -96,6 +96,7 @@ public class HiveTableOperations extends BaseMetastoreTableOperations { // the max size is based on HMS backend database. For Hive versions below 2.3, the max table parameter size is 4000 // characters, see https://issues.apache.org/jira/browse/HIVE-12274 + // set to 0 to not expose Iceberg metadata in HMS Table properties. private static final String HIVE_TABLE_PROPERTY_MAX_SIZE = "iceberg.hive.table-property-max-size"; private static final long HIVE_TABLE_PROPERTY_MAX_SIZE_DEFAULT = 32672; private static final long HIVE_ACQUIRE_LOCK_TIMEOUT_MS_DEFAULT = 3 * 60 * 1000; // 3 minutes @@ -409,13 +410,14 @@ private void setHmsTableParameters(String newMetadataLocation, Table tbl, TableM tbl.setParameters(parameters); } - private void setSnapshotStats(TableMetadata metadata, Map parameters) { + @VisibleForTesting + void setSnapshotStats(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.CURRENT_SNAPSHOT_ID); parameters.remove(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP); parameters.remove(TableProperties.CURRENT_SNAPSHOT_SUMMARY); Snapshot currentSnapshot = metadata.currentSnapshot(); - if (currentSnapshot != null) { + if (exposeInHmsProperties() && currentSnapshot != null) { parameters.put(TableProperties.CURRENT_SNAPSHOT_ID, String.valueOf(currentSnapshot.snapshotId())); parameters.put(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP, String.valueOf(currentSnapshot.timestampMillis())); setSnapshotSummary(parameters, currentSnapshot); @@ -439,25 +441,28 @@ void setSnapshotSummary(Map parameters, Snapshot currentSnapshot } } - private void setSchema(TableMetadata metadata, Map parameters) { + @VisibleForTesting + void setSchema(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.CURRENT_SCHEMA); - if (metadata.schema() != null) { + if (exposeInHmsProperties() && metadata.schema() != null) { String schema = SchemaParser.toJson(metadata.schema()); setField(parameters, TableProperties.CURRENT_SCHEMA, schema); } } - private void setPartitionSpec(TableMetadata metadata, Map parameters) { + @VisibleForTesting + void setPartitionSpec(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_PARTITION_SPEC); - if (metadata.spec() != null && metadata.spec().isPartitioned()) { + if (exposeInHmsProperties() && metadata.spec() != null && metadata.spec().isPartitioned()) { String spec = PartitionSpecParser.toJson(metadata.spec()); setField(parameters, TableProperties.DEFAULT_PARTITION_SPEC, spec); } } - private void setSortOrder(TableMetadata metadata, Map parameters) { + @VisibleForTesting + void setSortOrder(TableMetadata metadata, Map parameters) { parameters.remove(TableProperties.DEFAULT_SORT_ORDER); - if (metadata.sortOrder() != null && metadata.sortOrder().isSorted()) { + if (exposeInHmsProperties() && metadata.sortOrder() != null && metadata.sortOrder().isSorted()) { String sortOrder = SortOrderParser.toJson(metadata.sortOrder()); setField(parameters, TableProperties.DEFAULT_SORT_ORDER, sortOrder); } @@ -471,6 +476,10 @@ private void setField(Map parameters, String key, String value) } } + private boolean exposeInHmsProperties() { + return maxHiveTablePropertySize > 0; + } + private StorageDescriptor storageDescriptor(TableMetadata metadata, boolean hiveEngineEnabled) { final StorageDescriptor storageDescriptor = new StorageDescriptor(); diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index b3a923a6dd15..e2307df3653e 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -39,6 +39,7 @@ import org.apache.iceberg.SortOrder; import org.apache.iceberg.SortOrderParser; import org.apache.iceberg.Table; +import org.apache.iceberg.TableMetadata; import org.apache.iceberg.TableProperties; import org.apache.iceberg.Transaction; import org.apache.iceberg.UpdateSchema; @@ -64,6 +65,11 @@ import static org.apache.iceberg.NullOrder.NULLS_FIRST; import static org.apache.iceberg.SortDirection.ASC; +import static org.apache.iceberg.TableProperties.CURRENT_SCHEMA; +import static org.apache.iceberg.TableProperties.CURRENT_SNAPSHOT_ID; +import static org.apache.iceberg.TableProperties.CURRENT_SNAPSHOT_SUMMARY; +import static org.apache.iceberg.TableProperties.CURRENT_SNAPSHOT_TIMESTAMP; +import static org.apache.iceberg.TableProperties.DEFAULT_PARTITION_SPEC; import static org.apache.iceberg.TableProperties.DEFAULT_SORT_ORDER; import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; @@ -503,9 +509,9 @@ public void testSnapshotStatsTableProperties() throws Exception { // check whether parameters are in expected state Map parameters = hmsTableParameters(); Assert.assertEquals("0", parameters.get(TableProperties.SNAPSHOT_COUNT)); - Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY)); - Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_ID)); - Assert.assertNull(parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP)); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_SUMMARY)); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_ID)); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_TIMESTAMP)); // create a snapshot Table icebergTable = catalog.loadTable(tableIdentifier); @@ -521,11 +527,11 @@ public void testSnapshotStatsTableProperties() throws Exception { parameters = hmsTableParameters(); Assert.assertEquals("1", parameters.get(TableProperties.SNAPSHOT_COUNT)); String summary = JsonUtil.mapper().writeValueAsString(icebergTable.currentSnapshot().summary()); - Assert.assertEquals(summary, parameters.get(TableProperties.CURRENT_SNAPSHOT_SUMMARY)); + Assert.assertEquals(summary, parameters.get(CURRENT_SNAPSHOT_SUMMARY)); long snapshotId = icebergTable.currentSnapshot().snapshotId(); - Assert.assertEquals(String.valueOf(snapshotId), parameters.get(TableProperties.CURRENT_SNAPSHOT_ID)); + Assert.assertEquals(String.valueOf(snapshotId), parameters.get(CURRENT_SNAPSHOT_ID)); Assert.assertEquals(String.valueOf(icebergTable.currentSnapshot().timestampMillis()), - parameters.get(TableProperties.CURRENT_SNAPSHOT_TIMESTAMP)); + parameters.get(CURRENT_SNAPSHOT_TIMESTAMP)); } finally { catalog.dropTable(tableIdentifier); @@ -557,11 +563,40 @@ public void testSetSnapshotSummary() throws Exception { long summarySize = JsonUtil.mapper().writeValueAsString(summary).length(); // the limit has been updated to 4000 instead of the default value(32672) Assert.assertTrue(summarySize > 4000 && summarySize < 32672); - parameters.remove(TableProperties.CURRENT_SNAPSHOT_SUMMARY); + parameters.remove(CURRENT_SNAPSHOT_SUMMARY); spyOps.setSnapshotSummary(parameters, snapshot); Assert.assertEquals("The snapshot summary must not be in parameters due to the size limit", 0, parameters.size()); } + @Test + public void testNotExposeTableProperties() { + Configuration conf = new Configuration(); + conf.set("iceberg.hive.table-property-max-size", "0"); + HiveTableOperations spyOps = spy(new HiveTableOperations(conf, null, null, catalog.name(), DB_NAME, "tbl")); + TableMetadata metadata = mock(TableMetadata.class); + Map parameters = Maps.newHashMap(); + parameters.put(CURRENT_SNAPSHOT_SUMMARY, "summary"); + parameters.put(CURRENT_SNAPSHOT_ID, "snapshotId"); + parameters.put(CURRENT_SNAPSHOT_TIMESTAMP, "timestamp"); + parameters.put(CURRENT_SCHEMA, "schema"); + parameters.put(DEFAULT_PARTITION_SPEC, "partitionSpec"); + parameters.put(DEFAULT_SORT_ORDER, "sortOrder"); + + spyOps.setSnapshotStats(metadata, parameters); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_SUMMARY)); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_ID)); + Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_TIMESTAMP)); + + spyOps.setSchema(metadata, parameters); + Assert.assertNull(parameters.get(CURRENT_SCHEMA)); + + spyOps.setPartitionSpec(metadata, parameters); + Assert.assertNull(parameters.get(DEFAULT_PARTITION_SPEC)); + + spyOps.setSortOrder(metadata, parameters); + Assert.assertNull(parameters.get(DEFAULT_SORT_ORDER)); + } + @Test public void testSetDefaultPartitionSpec() throws Exception { Schema schema = new Schema( @@ -594,8 +629,7 @@ public void testSetCurrentSchema() throws Exception { try { Table table = catalog.buildTable(tableIdent, schema).create(); - Assert.assertEquals(SchemaParser.toJson(table.schema()), - hmsTableParameters().get(TableProperties.CURRENT_SCHEMA)); + Assert.assertEquals(SchemaParser.toJson(table.schema()), hmsTableParameters().get(CURRENT_SCHEMA)); // add many new fields to make the schema json string exceed the limit UpdateSchema updateSchema = table.updateSchema(); @@ -605,7 +639,7 @@ public void testSetCurrentSchema() throws Exception { updateSchema.commit(); Assert.assertTrue(SchemaParser.toJson(table.schema()).length() > 32672); - Assert.assertNull(hmsTableParameters().get(TableProperties.CURRENT_SCHEMA)); + Assert.assertNull(hmsTableParameters().get(CURRENT_SCHEMA)); } finally { catalog.dropTable(tableIdent); } From 72dc06156f30f0c0e9db8fe7cba8492e41863ee4 Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 25 Apr 2022 16:29:29 -0700 Subject: [PATCH 11/12] Remove the spy in tests. --- .../org/apache/iceberg/hive/TestHiveCatalog.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index e2307df3653e..3f5d7749048f 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -542,7 +542,7 @@ public void testSnapshotStatsTableProperties() throws Exception { public void testSetSnapshotSummary() throws Exception { Configuration conf = new Configuration(); conf.set("iceberg.hive.table-property-max-size", "4000"); - HiveTableOperations spyOps = spy(new HiveTableOperations(conf, null, null, catalog.name(), DB_NAME, "tbl")); + HiveTableOperations ops = new HiveTableOperations(conf, null, null, catalog.name(), DB_NAME, "tbl"); Snapshot snapshot = mock(Snapshot.class); Map summary = Maps.newHashMap(); when(snapshot.summary()).thenReturn(summary); @@ -553,7 +553,7 @@ public void testSetSnapshotSummary() throws Exception { } Assert.assertTrue(JsonUtil.mapper().writeValueAsString(summary).length() < 4000); Map parameters = Maps.newHashMap(); - spyOps.setSnapshotSummary(parameters, snapshot); + ops.setSnapshotSummary(parameters, snapshot); Assert.assertEquals("The snapshot summary must be in parameters", 1, parameters.size()); // create a snapshot summary whose json string size exceeds the limit @@ -564,7 +564,7 @@ public void testSetSnapshotSummary() throws Exception { // the limit has been updated to 4000 instead of the default value(32672) Assert.assertTrue(summarySize > 4000 && summarySize < 32672); parameters.remove(CURRENT_SNAPSHOT_SUMMARY); - spyOps.setSnapshotSummary(parameters, snapshot); + ops.setSnapshotSummary(parameters, snapshot); Assert.assertEquals("The snapshot summary must not be in parameters due to the size limit", 0, parameters.size()); } @@ -572,7 +572,7 @@ public void testSetSnapshotSummary() throws Exception { public void testNotExposeTableProperties() { Configuration conf = new Configuration(); conf.set("iceberg.hive.table-property-max-size", "0"); - HiveTableOperations spyOps = spy(new HiveTableOperations(conf, null, null, catalog.name(), DB_NAME, "tbl")); + HiveTableOperations ops = new HiveTableOperations(conf, null, null, catalog.name(), DB_NAME, "tbl"); TableMetadata metadata = mock(TableMetadata.class); Map parameters = Maps.newHashMap(); parameters.put(CURRENT_SNAPSHOT_SUMMARY, "summary"); @@ -582,18 +582,18 @@ public void testNotExposeTableProperties() { parameters.put(DEFAULT_PARTITION_SPEC, "partitionSpec"); parameters.put(DEFAULT_SORT_ORDER, "sortOrder"); - spyOps.setSnapshotStats(metadata, parameters); + ops.setSnapshotStats(metadata, parameters); Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_SUMMARY)); Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_ID)); Assert.assertNull(parameters.get(CURRENT_SNAPSHOT_TIMESTAMP)); - spyOps.setSchema(metadata, parameters); + ops.setSchema(metadata, parameters); Assert.assertNull(parameters.get(CURRENT_SCHEMA)); - spyOps.setPartitionSpec(metadata, parameters); + ops.setPartitionSpec(metadata, parameters); Assert.assertNull(parameters.get(DEFAULT_PARTITION_SPEC)); - spyOps.setSortOrder(metadata, parameters); + ops.setSortOrder(metadata, parameters); Assert.assertNull(parameters.get(DEFAULT_SORT_ORDER)); } From 0c178a75d5177ffd5d5f255b40d666d4d837e09b Mon Sep 17 00:00:00 2001 From: yufeigu Date: Mon, 25 Apr 2022 16:29:50 -0700 Subject: [PATCH 12/12] Remove the spy in tests. --- .../src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 3f5d7749048f..69fa23f391fd 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -74,7 +74,6 @@ import static org.apache.iceberg.expressions.Expressions.bucket; import static org.apache.iceberg.types.Types.NestedField.required; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; public class TestHiveCatalog extends HiveMetastoreTest {