From fc8314c88a585c9b466a93acdedae7928d459106 Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Mon, 28 Sep 2020 18:05:10 +0200 Subject: [PATCH 1/9] Hive: Create new mr-hive3 module which builds with Hive3/Hadoop3 --- build.gradle | 76 ++++++++++++++ .../apache/iceberg/hive/HiveClientPool.java | 11 ++- .../apache/iceberg/hive/MetastoreUtil.java | 43 ++++++++ .../iceberg/hive/TestHiveMetastore.java | 30 +++++- .../IcebergDateObjectInspectorHive3.java | 62 ++++++++++++ .../IcebergTimestampObjectInspectorHive3.java | 90 +++++++++++++++++ .../TestIcebergDateObjectInspectorHive3.java | 65 ++++++++++++ ...tIcebergTimestampObjectInspectorHive3.java | 99 +++++++++++++++++++ .../mr/hive/HiveIcebergFilterFactory.java | 3 + .../mr/hive/HiveIcebergStorageHandler.java | 6 ++ .../IcebergObjectInspector.java | 30 +++++- .../TestIcebergObjectInspector.java | 23 ++++- settings.gradle | 2 + 13 files changed, 533 insertions(+), 7 deletions(-) create mode 100644 hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java create mode 100644 mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java create mode 100644 mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java create mode 100644 mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java create mode 100644 mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java diff --git a/build.gradle b/build.gradle index 87791fa9a1e8..8cd167644378 100644 --- a/build.gradle +++ b/build.gradle @@ -472,6 +472,79 @@ project(':iceberg-mr') { } } +if (jdkVersion == '8') { + project(':iceberg-mr-hive3') { + + // run the tests in iceberg-mr with Hive3 dependencies + sourceSets { + test { + java.srcDirs = ['../mr/src/test/java', 'src/test/java'] + resources.srcDirs = ['../mr/src/test/resources', 'src/test/resources'] + } + } + + // exclude these Hive2-specific tests from iceberg-mr + test { + exclude '**/TestIcebergDateObjectInspector.class' + exclude '**/TestIcebergTimestampObjectInspector.class' + } + + configurations.all { + resolutionStrategy.eachDependency { dep -> + if (dep.requested.group == 'org.apache.hive' && dep.requested.name != 'hive-storage-api') { + dep.useVersion '3.1.2' + } else if (dep.requested.group == 'org.apache.hadoop') { + dep.useVersion '3.1.0' + } + } + } + + dependencies { + compile project(':iceberg-api') + compile project(':iceberg-core') + compile project(':iceberg-data') + compile project(':iceberg-hive-metastore') + compile project(':iceberg-orc') + compile project(':iceberg-parquet') + compile project(':iceberg-mr') + + compileOnly("org.apache.hadoop:hadoop-client") { + exclude group: 'org.apache.avro', module: 'avro' + } + + compileOnly("org.apache.hive:hive-exec::core") { + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'com.google.guava' + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.apache.calcite.avatica' + exclude group: 'org.apache.hive', module: 'hive-llap-tez' + exclude group: 'org.apache.logging.log4j' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } + compileOnly("org.apache.hive:hive-metastore") + compileOnly("org.apache.hive:hive-serde") + + testCompile project(path: ':iceberg-data', configuration: 'testArtifacts') + testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') + testCompile project(path: ':iceberg-core', configuration: 'testArtifacts') + testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') + + testCompile("org.apache.avro:avro:1.9.2") + testCompile("org.apache.calcite:calcite-core") + testCompile("com.esotericsoftware:kryo-shaded:4.0.2") + testCompile("com.fasterxml.jackson.core:jackson-annotations:2.6.5") + testCompile("com.klarna:hiverunner:6.0.1") { + exclude group: 'javax.jms', module: 'jms' + exclude group: 'org.apache.hive', module: 'hive-exec' + exclude group: 'org.codehaus.jettison', module: 'jettison' + exclude group: 'org.apache.calcite.avatica' + } + } + } +} + project(':iceberg-hive-runtime') { apply plugin: 'com.github.johnrengelman.shadow' @@ -491,6 +564,9 @@ project(':iceberg-hive-runtime') { dependencies { compile project(':iceberg-mr') + if (jdkVersion == '8') { + compile project(':iceberg-mr-hive3') + } } shadowJar { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java index 0ad1321cd866..bcb3004246a6 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java @@ -41,7 +41,16 @@ public HiveClientPool(int poolSize, Configuration conf) { @Override protected HiveMetaStoreClient newClient() { try { - return new HiveMetaStoreClient(hiveConf); + // create the metastore client based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + if (MetastoreUtil.hive3PresentOnClasspath()) { + return (HiveMetaStoreClient) Class + .forName(HiveMetaStoreClient.class.getName()) + .getConstructor(Configuration.class) + .newInstance(hiveConf); + } else { + return new HiveMetaStoreClient(hiveConf); + } } catch (MetaException e) { throw new RuntimeMetaException(e, "Failed to connect to Hive Metastore"); } catch (Throwable t) { diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java new file mode 100644 index 000000000000..a93cbdec4064 --- /dev/null +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.hive; + +public class MetastoreUtil { + + // this class is unique to Hive3 and cannot be found in Hive2, therefore a good proxy to see if + // we are working against Hive3 dependencies + private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; + + private MetastoreUtil() { + + } + + /** + * @return true if Hive3 dependencies are found on classpath, false otherwise + */ + public static boolean hive3PresentOnClasspath() { + try { + Class.forName(HIVE3_UNIQUE_CLASS); + return true; + } catch (ClassNotFoundException e) { + return false; + } + } +} diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index d45d3df53066..1d54ad2d80b3 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.lang.reflect.Method; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -80,6 +81,15 @@ public void stop() { if (hiveLocalDir != null) { hiveLocalDir.delete(); } + + // remove raw store if exists + try { + Method cleanupRawStore = HiveMetaStore.class.getDeclaredMethod("cleanupRawStore"); + cleanupRawStore.setAccessible(true); + cleanupRawStore.invoke(null); + } catch (Exception e) { + // no op + } } public HiveConf hiveConf() { @@ -94,8 +104,24 @@ public String getDatabasePath(String dbName) { private TServer newThriftServer(TServerSocket socket, HiveConf conf) throws Exception { HiveConf serverConf = new HiveConf(conf); serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); - HiveMetaStore.HMSHandler baseHandler = new HiveMetaStore.HMSHandler("new db based metaserver", serverConf); - IHMSHandler handler = RetryingHMSHandler.getProxy(serverConf, baseHandler, false); + + // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + HiveMetaStore.HMSHandler baseHandler; + IHMSHandler handler; + if (MetastoreUtil.hive3PresentOnClasspath()) { + baseHandler = (HiveMetaStore.HMSHandler) Class + .forName(HiveMetaStore.HMSHandler.class.getName()) + .getConstructor(String.class, Configuration.class) + .newInstance("new db based metaserver", serverConf); + handler = (IHMSHandler) Class + .forName(RetryingHMSHandler.class.getName()) + .getDeclaredMethod("getProxy", Configuration.class, IHMSHandler.class, boolean.class) + .invoke(null, serverConf, baseHandler, false); + } else { + baseHandler = new HiveMetaStore.HMSHandler("new db based metaserver", serverConf); + handler = RetryingHMSHandler.getProxy(serverConf, baseHandler, false); + } TThreadPoolServer.Args args = new TThreadPoolServer.Args(socket) .processor(new TSetIpAddressProcessor<>(handler)) diff --git a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java b/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java new file mode 100644 index 000000000000..cedd2a7f92c7 --- /dev/null +++ b/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mr.hive.serde.objectinspector; + +import java.time.LocalDate; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.iceberg.util.DateTimeUtil; + +public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector + implements DateObjectInspector { + + private static final IcebergDateObjectInspectorHive3 INSTANCE = new IcebergDateObjectInspectorHive3(); + + public static IcebergDateObjectInspectorHive3 get() { + return INSTANCE; + } + + private IcebergDateObjectInspectorHive3() { + super(TypeInfoFactory.dateTypeInfo); + } + + @Override + public Date getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + LocalDate date = (LocalDate) o; + return Date.ofEpochDay(DateTimeUtil.daysFromDate(date)); + } + + @Override + public DateWritableV2 getPrimitiveWritableObject(Object o) { + return o == null ? null : new DateWritableV2(DateTimeUtil.daysFromDate((LocalDate) o)); + } + + @Override + public Object copyObject(Object o) { + return o == null ? null : new Date((Date) o); + } + +} diff --git a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java b/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java new file mode 100644 index 000000000000..7332f27416a7 --- /dev/null +++ b/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mr.hive.serde.objectinspector; + +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +public abstract class IcebergTimestampObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector + implements TimestampObjectInspector { + + private static final IcebergTimestampObjectInspectorHive3 INSTANCE_WITH_ZONE = + new IcebergTimestampObjectInspectorHive3() { + @Override + LocalDateTime toLocalDateTime(Object o) { + return ((OffsetDateTime) o).toLocalDateTime(); + } + }; + + private static final IcebergTimestampObjectInspectorHive3 INSTANCE_WITHOUT_ZONE = + new IcebergTimestampObjectInspectorHive3() { + @Override + LocalDateTime toLocalDateTime(Object o) { + return (LocalDateTime) o; + } + }; + + public static IcebergTimestampObjectInspectorHive3 get(boolean adjustToUTC) { + return adjustToUTC ? INSTANCE_WITH_ZONE : INSTANCE_WITHOUT_ZONE; + } + + private IcebergTimestampObjectInspectorHive3() { + super(TypeInfoFactory.timestampTypeInfo); + } + + + abstract LocalDateTime toLocalDateTime(Object object); + + @Override + public Timestamp getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + LocalDateTime time = toLocalDateTime(o); + Timestamp timestamp = Timestamp.ofEpochMilli(time.toInstant(ZoneOffset.UTC).toEpochMilli()); + timestamp.setNanos(time.getNano()); + return timestamp; + } + + @Override + public TimestampWritableV2 getPrimitiveWritableObject(Object o) { + Timestamp ts = getPrimitiveJavaObject(o); + return ts == null ? null : new TimestampWritableV2(ts); + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + + Timestamp ts = (Timestamp) o; + Timestamp copy = new Timestamp(ts); + copy.setNanos(ts.getNanos()); + return copy; + } + +} diff --git a/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java b/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java new file mode 100644 index 000000000000..cf763b6017ab --- /dev/null +++ b/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mr.hive.serde.objectinspector; + +import java.time.LocalDate; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TestIcebergDateObjectInspectorHive3 { + + @Test + public void testIcebergDateObjectInspector() { + DateObjectInspector oi = IcebergDateObjectInspectorHive3.get(); + + Assert.assertEquals(ObjectInspector.Category.PRIMITIVE, oi.getCategory()); + Assert.assertEquals(PrimitiveObjectInspector.PrimitiveCategory.DATE, oi.getPrimitiveCategory()); + + Assert.assertEquals(TypeInfoFactory.dateTypeInfo, oi.getTypeInfo()); + Assert.assertEquals(TypeInfoFactory.dateTypeInfo.getTypeName(), oi.getTypeName()); + + Assert.assertEquals(Date.class, oi.getJavaPrimitiveClass()); + Assert.assertEquals(DateWritableV2.class, oi.getPrimitiveWritableClass()); + + Assert.assertNull(oi.copyObject(null)); + Assert.assertNull(oi.getPrimitiveJavaObject(null)); + Assert.assertNull(oi.getPrimitiveWritableObject(null)); + + LocalDate local = LocalDate.of(2020, 1, 1); + Date date = Date.valueOf("2020-01-01"); + + Assert.assertEquals(date, oi.getPrimitiveJavaObject(local)); + Assert.assertEquals(new DateWritableV2(date), oi.getPrimitiveWritableObject(local)); + + Date copy = (Date) oi.copyObject(date); + + Assert.assertEquals(date, copy); + Assert.assertNotSame(date, copy); + + Assert.assertFalse(oi.preferWritable()); + } + +} diff --git a/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java b/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java new file mode 100644 index 000000000000..e7f6cca37952 --- /dev/null +++ b/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mr.hive.serde.objectinspector; + +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Assert; +import org.junit.Test; + +public class TestIcebergTimestampObjectInspectorHive3 { + + @Test + public void testIcebergTimestampObjectInspector() { + TimestampObjectInspector oi = IcebergTimestampObjectInspectorHive3.get(false); + + Assert.assertEquals(ObjectInspector.Category.PRIMITIVE, oi.getCategory()); + Assert.assertEquals(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP, oi.getPrimitiveCategory()); + + Assert.assertEquals(TypeInfoFactory.timestampTypeInfo, oi.getTypeInfo()); + Assert.assertEquals(TypeInfoFactory.timestampTypeInfo.getTypeName(), oi.getTypeName()); + + Assert.assertEquals(Timestamp.class, oi.getJavaPrimitiveClass()); + Assert.assertEquals(TimestampWritableV2.class, oi.getPrimitiveWritableClass()); + + Assert.assertNull(oi.copyObject(null)); + Assert.assertNull(oi.getPrimitiveJavaObject(null)); + Assert.assertNull(oi.getPrimitiveWritableObject(null)); + + LocalDateTime local = LocalDateTime.of(2020, 1, 1, 0, 0, 1, 500); + Timestamp ts = Timestamp.valueOf("2020-01-01 00:00:01.00000050"); + + Assert.assertEquals(ts, oi.getPrimitiveJavaObject(local)); + Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(local)); + + Timestamp copy = (Timestamp) oi.copyObject(ts); + + Assert.assertEquals(ts, copy); + Assert.assertNotSame(ts, copy); + + Assert.assertFalse(oi.preferWritable()); + } + + @Test + public void testIcebergTimestampObjectInspectorWithUTCAdjustment() { + TimestampObjectInspector oi = IcebergTimestampObjectInspectorHive3.get(true); + + Assert.assertEquals(ObjectInspector.Category.PRIMITIVE, oi.getCategory()); + Assert.assertEquals(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP, oi.getPrimitiveCategory()); + + Assert.assertEquals(TypeInfoFactory.timestampTypeInfo, oi.getTypeInfo()); + Assert.assertEquals(TypeInfoFactory.timestampTypeInfo.getTypeName(), oi.getTypeName()); + + Assert.assertEquals(Timestamp.class, oi.getJavaPrimitiveClass()); + Assert.assertEquals(TimestampWritableV2.class, oi.getPrimitiveWritableClass()); + + Assert.assertNull(oi.copyObject(null)); + Assert.assertNull(oi.getPrimitiveJavaObject(null)); + Assert.assertNull(oi.getPrimitiveWritableObject(null)); + + LocalDateTime local = LocalDateTime.of(2020, 1, 1, 0, 0, 1, 500); + OffsetDateTime offsetDateTime = OffsetDateTime.of(local, ZoneOffset.ofHours(4)); + Timestamp ts = Timestamp.valueOf("2020-01-01 00:00:01.00000050"); + + Assert.assertEquals(ts, oi.getPrimitiveJavaObject(offsetDateTime)); + Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(offsetDateTime)); + + Timestamp copy = (Timestamp) oi.copyObject(ts); + + Assert.assertEquals(ts, copy); + Assert.assertNotSame(ts, copy); + + Assert.assertFalse(oi.preferWritable()); + } + +} diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java index 63e823c65815..c800f72018d4 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java @@ -127,6 +127,9 @@ private static Object leafToLiteral(PredicateLeaf leaf) { case FLOAT: return leaf.getLiteral(); case DATE: + if (leaf.getLiteral() instanceof java.sql.Date) { + return daysFromDate((Date) leaf.getLiteral()); + } return daysFromTimestamp((Timestamp) leaf.getLiteral()); case TIMESTAMP: return microsFromTimestamp((Timestamp) LITERAL_FIELD.get(leaf)); diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index d6c4c2feab91..9f77aa91b7bc 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -91,6 +91,12 @@ public void configureTableJobProperties(TableDesc tableDesc, Map } + // Override annotation commented out, since this interface method has been introduced only in Hive 3 + // @Override + public void configureInputJobCredentials(TableDesc tableDesc, Map secrets) { + + } + @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index a5fed0bc6f80..e1bc54818e1b 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -23,10 +23,13 @@ import javax.annotation.Nullable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; +import org.apache.iceberg.hive.MetastoreUtil; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; @@ -72,7 +75,20 @@ public ObjectInspector primitive(Type.PrimitiveType primitiveType) { primitiveTypeInfo = TypeInfoFactory.booleanTypeInfo; break; case DATE: - return IcebergDateObjectInspector.get(); + // create the correct inspector based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change in DateObjectInspector between Hive2 and Hive3 + if (MetastoreUtil.hive3PresentOnClasspath()) { + try { + return (DateObjectInspector) Class + .forName("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3") + .getMethod("get") + .invoke(null); + } catch (Exception e) { + throw new RuntimeException("Unable to instantiate Hive3 date object inspector", e); + } + } else { + return IcebergDateObjectInspector.get(); + } case DECIMAL: Types.DecimalType type = (Types.DecimalType) primitiveType; return IcebergDecimalObjectInspector.get(type.precision(), type.scale()); @@ -96,6 +112,18 @@ public ObjectInspector primitive(Type.PrimitiveType primitiveType) { break; case TIMESTAMP: boolean adjustToUTC = ((Types.TimestampType) primitiveType).shouldAdjustToUTC(); + // create the correct inspector based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this b/c there is a breaking API change in TimestampObjectInspector between Hive2 and Hive3 + if (MetastoreUtil.hive3PresentOnClasspath()) { + try { + return (TimestampObjectInspector) Class + .forName("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3") + .getMethod("get", boolean.class) + .invoke(null, adjustToUTC); + } catch (Exception e) { + throw new RuntimeException("Unable to instantiate Hive3 timestamp object inspector", e); + } + } return IcebergTimestampObjectInspector.get(adjustToUTC); case TIME: diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java index dc2a95d1933e..ac67deccf0de 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergObjectInspector.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.AssertHelpers; import org.apache.iceberg.Schema; +import org.apache.iceberg.hive.MetastoreUtil; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; import org.junit.Assert; @@ -90,7 +91,13 @@ public void testIcebergObjectInspector() { Assert.assertEquals(3, dateField.getFieldID()); Assert.assertEquals("date_field", dateField.getFieldName()); Assert.assertEquals("date comment", dateField.getFieldComment()); - Assert.assertEquals(IcebergDateObjectInspector.get(), dateField.getFieldObjectInspector()); + if (MetastoreUtil.hive3PresentOnClasspath()) { + Assert.assertEquals( + "org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3", + dateField.getFieldObjectInspector().getClass().getName()); + } else { + Assert.assertEquals(IcebergDateObjectInspector.get(), dateField.getFieldObjectInspector()); + } // decimal StructField decimalField = soi.getStructFieldRef("decimal_field"); @@ -146,14 +153,24 @@ public void testIcebergObjectInspector() { Assert.assertEquals(11, timestampField.getFieldID()); Assert.assertEquals("timestamp_field", timestampField.getFieldName()); Assert.assertEquals("timestamp comment", timestampField.getFieldComment()); - Assert.assertEquals(IcebergTimestampObjectInspector.get(false), timestampField.getFieldObjectInspector()); + if (MetastoreUtil.hive3PresentOnClasspath()) { + Assert.assertTrue(timestampField.getFieldObjectInspector().getClass().getName() + .startsWith("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3")); + } else { + Assert.assertEquals(IcebergTimestampObjectInspector.get(false), timestampField.getFieldObjectInspector()); + } // timestamp with tz StructField timestampTzField = soi.getStructFieldRef("timestamptz_field"); Assert.assertEquals(12, timestampTzField.getFieldID()); Assert.assertEquals("timestamptz_field", timestampTzField.getFieldName()); Assert.assertEquals("timestamptz comment", timestampTzField.getFieldComment()); - Assert.assertEquals(IcebergTimestampObjectInspector.get(true), timestampTzField.getFieldObjectInspector()); + if (MetastoreUtil.hive3PresentOnClasspath()) { + Assert.assertTrue(timestampTzField.getFieldObjectInspector().getClass().getName() + .startsWith("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3")); + } else { + Assert.assertEquals(IcebergTimestampObjectInspector.get(true), timestampTzField.getFieldObjectInspector()); + } // UUID StructField uuidField = soi.getStructFieldRef("uuid_field"); diff --git a/settings.gradle b/settings.gradle index 18b07fc58975..53489bc58ef8 100644 --- a/settings.gradle +++ b/settings.gradle @@ -57,7 +57,9 @@ project(':hive-metastore').name = 'iceberg-hive-metastore' if (JavaVersion.current() == JavaVersion.VERSION_1_8) { include 'spark2' include 'spark-runtime' + include 'mr-hive3' project(':spark2').name = 'iceberg-spark2' project(':spark-runtime').name = 'iceberg-spark-runtime' + project(':mr-hive3').name = 'iceberg-mr-hive3' } From 6b35a33b7a31999fdf9627fed8ce1b59d9e9126a Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Tue, 29 Sep 2020 18:03:31 +0200 Subject: [PATCH 2/9] Hive: Rename mr-hive3 to hive3; Use reflection helpers from iceberg-common to bridge Hive2/3 API differences; indentation fixes --- build.gradle | 22 +++----- .../apache/iceberg/hive/HiveClientPool.java | 26 ++++++---- .../apache/iceberg/hive/MetastoreUtil.java | 17 ++++--- .../iceberg/hive/TestHiveMetastore.java | 50 ++++++++----------- .../IcebergDateObjectInspectorHive3.java | 2 +- .../IcebergTimestampObjectInspectorHive3.java | 6 +-- .../TestIcebergDateObjectInspectorHive3.java | 0 ...tIcebergTimestampObjectInspectorHive3.java | 0 .../IcebergObjectInspector.java | 45 ++++++----------- settings.gradle | 4 +- 10 files changed, 77 insertions(+), 95 deletions(-) rename {mr-hive3 => hive3}/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java (96%) rename {mr-hive3 => hive3}/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java (96%) rename {mr-hive3 => hive3}/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java (100%) rename {mr-hive3 => hive3}/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java (100%) diff --git a/build.gradle b/build.gradle index 8cd167644378..3069f7c2b8f1 100644 --- a/build.gradle +++ b/build.gradle @@ -473,7 +473,7 @@ project(':iceberg-mr') { } if (jdkVersion == '8') { - project(':iceberg-mr-hive3') { + project(':iceberg-hive3') { // run the tests in iceberg-mr with Hive3 dependencies sourceSets { @@ -489,16 +489,6 @@ if (jdkVersion == '8') { exclude '**/TestIcebergTimestampObjectInspector.class' } - configurations.all { - resolutionStrategy.eachDependency { dep -> - if (dep.requested.group == 'org.apache.hive' && dep.requested.name != 'hive-storage-api') { - dep.useVersion '3.1.2' - } else if (dep.requested.group == 'org.apache.hadoop') { - dep.useVersion '3.1.0' - } - } - } - dependencies { compile project(':iceberg-api') compile project(':iceberg-core') @@ -508,11 +498,11 @@ if (jdkVersion == '8') { compile project(':iceberg-parquet') compile project(':iceberg-mr') - compileOnly("org.apache.hadoop:hadoop-client") { + compileOnly("org.apache.hadoop:hadoop-client:3.1.0") { exclude group: 'org.apache.avro', module: 'avro' } - compileOnly("org.apache.hive:hive-exec::core") { + compileOnly("org.apache.hive:hive-exec:3.1.2:core") { exclude group: 'com.google.code.findbugs', module: 'jsr305' exclude group: 'com.google.guava' exclude group: 'com.google.protobuf', module: 'protobuf-java' @@ -523,8 +513,8 @@ if (jdkVersion == '8') { exclude group: 'org.pentaho' // missing dependency exclude group: 'org.slf4j', module: 'slf4j-log4j12' } - compileOnly("org.apache.hive:hive-metastore") - compileOnly("org.apache.hive:hive-serde") + compileOnly("org.apache.hive:hive-metastore:3.1.2") + compileOnly("org.apache.hive:hive-serde:3.1.2") testCompile project(path: ':iceberg-data', configuration: 'testArtifacts') testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') @@ -565,7 +555,7 @@ project(':iceberg-hive-runtime') { dependencies { compile project(':iceberg-mr') if (jdkVersion == '8') { - compile project(':iceberg-mr-hive3') + compile project(':iceberg-hive3') } } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java index bcb3004246a6..1df705bcd481 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveClientPool.java @@ -23,10 +23,19 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.iceberg.common.DynConstructors; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; public class HiveClientPool extends ClientPool { + + // use appropriate ctor depending on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + private static final DynConstructors.Ctor CLIENT_CTOR = DynConstructors.builder() + .impl(HiveMetaStoreClient.class, HiveConf.class) + .impl(HiveMetaStoreClient.class, Configuration.class) + .build(); + private final HiveConf hiveConf; HiveClientPool(Configuration conf) { @@ -41,15 +50,14 @@ public HiveClientPool(int poolSize, Configuration conf) { @Override protected HiveMetaStoreClient newClient() { try { - // create the metastore client based on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change between Hive2 and Hive3 - if (MetastoreUtil.hive3PresentOnClasspath()) { - return (HiveMetaStoreClient) Class - .forName(HiveMetaStoreClient.class.getName()) - .getConstructor(Configuration.class) - .newInstance(hiveConf); - } else { - return new HiveMetaStoreClient(hiveConf); + try { + return CLIENT_CTOR.newInstance(hiveConf); + } catch (RuntimeException e) { + // any MetaException would be wrapped into RuntimeException during reflection, so let's double-check type here + if (e.getCause() instanceof MetaException) { + throw (MetaException) e.getCause(); + } + throw e; } } catch (MetaException e) { throw new RuntimeMetaException(e, "Failed to connect to Hive Metastore"); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index a93cbdec4064..fce2689adc39 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -25,19 +25,24 @@ public class MetastoreUtil { // we are working against Hive3 dependencies private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - private MetastoreUtil() { + private static Boolean hive3PresentOnClasspath = null; + private MetastoreUtil() { } /** + * Loads a Hive3-specific class to see if Hive3 is found on the classpath. Caches the result into a static variable. * @return true if Hive3 dependencies are found on classpath, false otherwise */ public static boolean hive3PresentOnClasspath() { - try { - Class.forName(HIVE3_UNIQUE_CLASS); - return true; - } catch (ClassNotFoundException e) { - return false; + if (hive3PresentOnClasspath == null) { + try { + Class.forName(HIVE3_UNIQUE_CLASS); + hive3PresentOnClasspath = true; + } catch (ClassNotFoundException e) { + hive3PresentOnClasspath = false; + } } + return hive3PresentOnClasspath; } } diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index 1d54ad2d80b3..c247989994c6 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -24,7 +24,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.lang.reflect.Method; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -36,6 +35,8 @@ import org.apache.hadoop.hive.metastore.IHMSHandler; import org.apache.hadoop.hive.metastore.RetryingHMSHandler; import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor; +import org.apache.iceberg.common.DynConstructors; +import org.apache.iceberg.common.DynMethods; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.server.TServer; import org.apache.thrift.server.TThreadPoolServer; @@ -48,6 +49,23 @@ public class TestHiveMetastore { + private static final DynMethods.StaticMethod CLEAN_RAW_STORE = DynMethods.builder("cleanupRawStore") + .hiddenImpl(HiveMetaStore.class) + .orNoop() + .buildStatic(); + + // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change between Hive2 and Hive3 + private static final DynConstructors.Ctor HMS_HANDLER_CTOR = DynConstructors.builder() + .impl(HiveMetaStore.HMSHandler.class, String.class, Configuration.class) + .impl(HiveMetaStore.HMSHandler.class, String.class, HiveConf.class) + .build(); + + private static final DynMethods.StaticMethod GET_BASE_HMS_HANDLER = DynMethods.builder("getProxy") + .impl(RetryingHMSHandler.class, Configuration.class, IHMSHandler.class, boolean.class) + .impl(RetryingHMSHandler.class, HiveConf.class, IHMSHandler.class, boolean.class) + .buildStatic(); + private File hiveLocalDir; private HiveConf hiveConf; private ExecutorService executorService; @@ -81,15 +99,7 @@ public void stop() { if (hiveLocalDir != null) { hiveLocalDir.delete(); } - - // remove raw store if exists - try { - Method cleanupRawStore = HiveMetaStore.class.getDeclaredMethod("cleanupRawStore"); - cleanupRawStore.setAccessible(true); - cleanupRawStore.invoke(null); - } catch (Exception e) { - // no op - } + CLEAN_RAW_STORE.invoke(); } public HiveConf hiveConf() { @@ -104,24 +114,8 @@ public String getDatabasePath(String dbName) { private TServer newThriftServer(TServerSocket socket, HiveConf conf) throws Exception { HiveConf serverConf = new HiveConf(conf); serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); - - // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change between Hive2 and Hive3 - HiveMetaStore.HMSHandler baseHandler; - IHMSHandler handler; - if (MetastoreUtil.hive3PresentOnClasspath()) { - baseHandler = (HiveMetaStore.HMSHandler) Class - .forName(HiveMetaStore.HMSHandler.class.getName()) - .getConstructor(String.class, Configuration.class) - .newInstance("new db based metaserver", serverConf); - handler = (IHMSHandler) Class - .forName(RetryingHMSHandler.class.getName()) - .getDeclaredMethod("getProxy", Configuration.class, IHMSHandler.class, boolean.class) - .invoke(null, serverConf, baseHandler, false); - } else { - baseHandler = new HiveMetaStore.HMSHandler("new db based metaserver", serverConf); - handler = RetryingHMSHandler.getProxy(serverConf, baseHandler, false); - } + HiveMetaStore.HMSHandler baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); + IHMSHandler handler = GET_BASE_HMS_HANDLER.invoke(serverConf, baseHandler, false); TThreadPoolServer.Args args = new TThreadPoolServer.Args(socket) .processor(new TSetIpAddressProcessor<>(handler)) diff --git a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java similarity index 96% rename from mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java rename to hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java index cedd2a7f92c7..e37ec079b06e 100644 --- a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java +++ b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java @@ -28,7 +28,7 @@ import org.apache.iceberg.util.DateTimeUtil; public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector - implements DateObjectInspector { + implements DateObjectInspector { private static final IcebergDateObjectInspectorHive3 INSTANCE = new IcebergDateObjectInspectorHive3(); diff --git a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java similarity index 96% rename from mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java rename to hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java index 7332f27416a7..1413df46061c 100644 --- a/mr-hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java +++ b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; public abstract class IcebergTimestampObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector - implements TimestampObjectInspector { + implements TimestampObjectInspector { private static final IcebergTimestampObjectInspectorHive3 INSTANCE_WITH_ZONE = new IcebergTimestampObjectInspectorHive3() { @@ -37,7 +37,7 @@ public abstract class IcebergTimestampObjectInspectorHive3 extends AbstractPrimi LocalDateTime toLocalDateTime(Object o) { return ((OffsetDateTime) o).toLocalDateTime(); } - }; + }; private static final IcebergTimestampObjectInspectorHive3 INSTANCE_WITHOUT_ZONE = new IcebergTimestampObjectInspectorHive3() { @@ -45,7 +45,7 @@ LocalDateTime toLocalDateTime(Object o) { LocalDateTime toLocalDateTime(Object o) { return (LocalDateTime) o; } - }; + }; public static IcebergTimestampObjectInspectorHive3 get(boolean adjustToUTC) { return adjustToUTC ? INSTANCE_WITH_ZONE : INSTANCE_WITHOUT_ZONE; diff --git a/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java similarity index 100% rename from mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java rename to hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java diff --git a/mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java similarity index 100% rename from mr-hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java rename to hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index e1bc54818e1b..0009e8c38609 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -23,19 +23,29 @@ import javax.annotation.Nullable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.iceberg.Schema; -import org.apache.iceberg.hive.MetastoreUtil; +import org.apache.iceberg.common.DynMethods; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; public final class IcebergObjectInspector extends TypeUtil.SchemaVisitor { + // get the correct inspectors based on whether we're working with Hive2 or Hive3 dependencies + // we need to do this because there is a breaking API change in Date/TimestampObjectInspector between Hive2 and Hive3 + private static final DynMethods.StaticMethod DATE_INSPECTOR = DynMethods.builder("get") + .impl("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3", null) + .impl(IcebergDateObjectInspector.class, null) + .buildStatic(); + + private static final DynMethods.StaticMethod TIMESTAMP_INSPECTOR = DynMethods.builder("get") + .impl("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3", boolean.class) + .impl(IcebergTimestampObjectInspector.class, boolean.class) + .buildStatic(); + public static ObjectInspector create(@Nullable Schema schema) { if (schema == null) { return IcebergRecordObjectInspector.empty(); @@ -75,20 +85,7 @@ public ObjectInspector primitive(Type.PrimitiveType primitiveType) { primitiveTypeInfo = TypeInfoFactory.booleanTypeInfo; break; case DATE: - // create the correct inspector based on whether we're working with Hive2 or Hive3 dependencies - // we need to do this because there is a breaking API change in DateObjectInspector between Hive2 and Hive3 - if (MetastoreUtil.hive3PresentOnClasspath()) { - try { - return (DateObjectInspector) Class - .forName("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3") - .getMethod("get") - .invoke(null); - } catch (Exception e) { - throw new RuntimeException("Unable to instantiate Hive3 date object inspector", e); - } - } else { - return IcebergDateObjectInspector.get(); - } + return DATE_INSPECTOR.invoke(); case DECIMAL: Types.DecimalType type = (Types.DecimalType) primitiveType; return IcebergDecimalObjectInspector.get(type.precision(), type.scale()); @@ -112,19 +109,7 @@ public ObjectInspector primitive(Type.PrimitiveType primitiveType) { break; case TIMESTAMP: boolean adjustToUTC = ((Types.TimestampType) primitiveType).shouldAdjustToUTC(); - // create the correct inspector based on whether we're working with Hive2 or Hive3 dependencies - // we need to do this b/c there is a breaking API change in TimestampObjectInspector between Hive2 and Hive3 - if (MetastoreUtil.hive3PresentOnClasspath()) { - try { - return (TimestampObjectInspector) Class - .forName("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergTimestampObjectInspectorHive3") - .getMethod("get", boolean.class) - .invoke(null, adjustToUTC); - } catch (Exception e) { - throw new RuntimeException("Unable to instantiate Hive3 timestamp object inspector", e); - } - } - return IcebergTimestampObjectInspector.get(adjustToUTC); + return TIMESTAMP_INSPECTOR.invoke(adjustToUTC); case TIME: default: diff --git a/settings.gradle b/settings.gradle index 53489bc58ef8..0377d31838e6 100644 --- a/settings.gradle +++ b/settings.gradle @@ -57,9 +57,9 @@ project(':hive-metastore').name = 'iceberg-hive-metastore' if (JavaVersion.current() == JavaVersion.VERSION_1_8) { include 'spark2' include 'spark-runtime' - include 'mr-hive3' + include 'hive3' project(':spark2').name = 'iceberg-spark2' project(':spark-runtime').name = 'iceberg-spark-runtime' - project(':mr-hive3').name = 'iceberg-mr-hive3' + project(':hive3').name = 'iceberg-hive3' } From 4fc53d0c56ebe032681eba5846311d5ea82b2afc Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Tue, 29 Sep 2020 19:34:34 +0200 Subject: [PATCH 3/9] update comment --- .../mr/hive/serde/objectinspector/IcebergObjectInspector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index 0009e8c38609..003e0afa70f8 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -34,7 +34,7 @@ public final class IcebergObjectInspector extends TypeUtil.SchemaVisitor { - // get the correct inspectors based on whether we're working with Hive2 or Hive3 dependencies + // get the correct inspectors depending on whether we're working with Hive2 or Hive3 dependencies // we need to do this because there is a breaking API change in Date/TimestampObjectInspector between Hive2 and Hive3 private static final DynMethods.StaticMethod DATE_INSPECTOR = DynMethods.builder("get") .impl("org.apache.iceberg.mr.hive.serde.objectinspector.IcebergDateObjectInspectorHive3", null) From 6a4ea190c1543834aa419a4d5cfc2513676fff9f Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Wed, 30 Sep 2020 17:09:44 +0200 Subject: [PATCH 4/9] Use Instant/epoch days to create date/timestamp objects in tests; Reflection optimizations --- .../apache/iceberg/hive/MetastoreUtil.java | 20 +++++++------- .../TestIcebergDateObjectInspectorHive3.java | 4 +-- ...tIcebergTimestampObjectInspectorHive3.java | 10 ++++--- .../IcebergObjectInspector.java | 26 ++++++++++++------- .../TestIcebergObjectInspector.java | 6 ++--- 5 files changed, 38 insertions(+), 28 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index fce2689adc39..a2b4ea59c99e 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -25,24 +25,24 @@ public class MetastoreUtil { // we are working against Hive3 dependencies private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - private static Boolean hive3PresentOnClasspath = null; + private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); private MetastoreUtil() { } /** - * Loads a Hive3-specific class to see if Hive3 is found on the classpath. Caches the result into a static variable. * @return true if Hive3 dependencies are found on classpath, false otherwise */ public static boolean hive3PresentOnClasspath() { - if (hive3PresentOnClasspath == null) { - try { - Class.forName(HIVE3_UNIQUE_CLASS); - hive3PresentOnClasspath = true; - } catch (ClassNotFoundException e) { - hive3PresentOnClasspath = false; - } + return HIVE3_PRESENT_ON_CLASSPATH; + } + + private static boolean detectHive3() { + try { + Class.forName(HIVE3_UNIQUE_CLASS); + return true; + } catch (ClassNotFoundException e) { + return false; } - return hive3PresentOnClasspath; } } diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java index cf763b6017ab..7b28f8d661a6 100644 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java +++ b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java @@ -48,8 +48,8 @@ public void testIcebergDateObjectInspector() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDate local = LocalDate.of(2020, 1, 1); - Date date = Date.valueOf("2020-01-01"); + LocalDate local = LocalDate.ofEpochDay(5005); + Date date = Date.ofEpochDay(5005); Assert.assertEquals(date, oi.getPrimitiveJavaObject(local)); Assert.assertEquals(new DateWritableV2(date), oi.getPrimitiveWritableObject(local)); diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java index e7f6cca37952..93e5fb7a574a 100644 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java +++ b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java @@ -19,8 +19,10 @@ package org.apache.iceberg.mr.hive.serde.objectinspector; +import java.time.Instant; import java.time.LocalDateTime; import java.time.OffsetDateTime; +import java.time.ZoneId; import java.time.ZoneOffset; import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; @@ -50,8 +52,8 @@ public void testIcebergTimestampObjectInspector() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDateTime local = LocalDateTime.of(2020, 1, 1, 0, 0, 1, 500); - Timestamp ts = Timestamp.valueOf("2020-01-01 00:00:01.00000050"); + LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(1601471970000L), ZoneId.of("UTC")); + Timestamp ts = Timestamp.ofEpochMilli(1601471970000L); Assert.assertEquals(ts, oi.getPrimitiveJavaObject(local)); Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(local)); @@ -81,9 +83,9 @@ public void testIcebergTimestampObjectInspectorWithUTCAdjustment() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDateTime local = LocalDateTime.of(2020, 1, 1, 0, 0, 1, 500); + LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(1601471970000L), ZoneId.of("UTC")); OffsetDateTime offsetDateTime = OffsetDateTime.of(local, ZoneOffset.ofHours(4)); - Timestamp ts = Timestamp.valueOf("2020-01-01 00:00:01.00000050"); + Timestamp ts = Timestamp.ofEpochMilli(1601471970000L); Assert.assertEquals(ts, oi.getPrimitiveJavaObject(offsetDateTime)); Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(offsetDateTime)); diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java index 003e0afa70f8..f9b2214e8f85 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergObjectInspector.java @@ -36,15 +36,23 @@ public final class IcebergObjectInspector extends TypeUtil.SchemaVisitor Date: Wed, 30 Sep 2020 17:49:52 +0200 Subject: [PATCH 5/9] Extract epochDays and epochMillis --- .../TestIcebergDateObjectInspectorHive3.java | 5 +++-- .../TestIcebergTimestampObjectInspectorHive3.java | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java index 7b28f8d661a6..ca48639eeadb 100644 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java +++ b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java @@ -48,8 +48,9 @@ public void testIcebergDateObjectInspector() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDate local = LocalDate.ofEpochDay(5005); - Date date = Date.ofEpochDay(5005); + int epochDays = 5005; + LocalDate local = LocalDate.ofEpochDay(epochDays); + Date date = Date.ofEpochDay(epochDays); Assert.assertEquals(date, oi.getPrimitiveJavaObject(local)); Assert.assertEquals(new DateWritableV2(date), oi.getPrimitiveWritableObject(local)); diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java index 93e5fb7a574a..e885689d9a1f 100644 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java +++ b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java @@ -52,8 +52,9 @@ public void testIcebergTimestampObjectInspector() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(1601471970000L), ZoneId.of("UTC")); - Timestamp ts = Timestamp.ofEpochMilli(1601471970000L); + long epochMilli = 1601471970000L; + LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneId.of("UTC")); + Timestamp ts = Timestamp.ofEpochMilli(epochMilli); Assert.assertEquals(ts, oi.getPrimitiveJavaObject(local)); Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(local)); @@ -83,9 +84,10 @@ public void testIcebergTimestampObjectInspectorWithUTCAdjustment() { Assert.assertNull(oi.getPrimitiveJavaObject(null)); Assert.assertNull(oi.getPrimitiveWritableObject(null)); - LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(1601471970000L), ZoneId.of("UTC")); + long epochMilli = 1601471970000L; + LocalDateTime local = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneId.of("UTC")); OffsetDateTime offsetDateTime = OffsetDateTime.of(local, ZoneOffset.ofHours(4)); - Timestamp ts = Timestamp.ofEpochMilli(1601471970000L); + Timestamp ts = Timestamp.ofEpochMilli(epochMilli); Assert.assertEquals(ts, oi.getPrimitiveJavaObject(offsetDateTime)); Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(offsetDateTime)); From 196e917a66f60f8b3e127b8af13604e6a3bb9fa0 Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Thu, 1 Oct 2020 10:45:57 +0200 Subject: [PATCH 6/9] Avoid concurrency issues between iceberg-mr and iceberg-hive3 tests --- build.gradle | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.gradle b/build.gradle index 3069f7c2b8f1..08fd721bfb52 100644 --- a/build.gradle +++ b/build.gradle @@ -483,6 +483,9 @@ if (jdkVersion == '8') { } } + // but first wait for the iceberg-mr (Hive2) tests to complete to avoid concurrency issues around HiveMetastoreTest + test.dependsOn(':iceberg-mr:test') + // exclude these Hive2-specific tests from iceberg-mr test { exclude '**/TestIcebergDateObjectInspector.class' From 7c055ad2e86d49a07fd5d62dfefd5cdaef8b4c46 Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Thu, 1 Oct 2020 17:28:42 +0200 Subject: [PATCH 7/9] Shutdown HMSHandler directly instead of using reflective call --- .../org/apache/iceberg/hive/TestHiveMetastore.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index c247989994c6..1a7b0067e77b 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -49,11 +49,6 @@ public class TestHiveMetastore { - private static final DynMethods.StaticMethod CLEAN_RAW_STORE = DynMethods.builder("cleanupRawStore") - .hiddenImpl(HiveMetaStore.class) - .orNoop() - .buildStatic(); - // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies // we need to do this because there is a breaking API change between Hive2 and Hive3 private static final DynConstructors.Ctor HMS_HANDLER_CTOR = DynConstructors.builder() @@ -70,6 +65,7 @@ public class TestHiveMetastore { private HiveConf hiveConf; private ExecutorService executorService; private TServer server; + private HiveMetaStore.HMSHandler baseHandler; public void start() { try { @@ -99,7 +95,9 @@ public void stop() { if (hiveLocalDir != null) { hiveLocalDir.delete(); } - CLEAN_RAW_STORE.invoke(); + if (baseHandler != null) { + baseHandler.shutdown(); + } } public HiveConf hiveConf() { @@ -114,7 +112,7 @@ public String getDatabasePath(String dbName) { private TServer newThriftServer(TServerSocket socket, HiveConf conf) throws Exception { HiveConf serverConf = new HiveConf(conf); serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true"); - HiveMetaStore.HMSHandler baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); + baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); IHMSHandler handler = GET_BASE_HMS_HANDLER.invoke(serverConf, baseHandler, false); TThreadPoolServer.Args args = new TThreadPoolServer.Args(socket) From 62da1a6fbd527849df0e85db43bc30c5dcc52a76 Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Tue, 6 Oct 2020 21:37:31 +0200 Subject: [PATCH 8/9] Hive: Instantiate metastore once per test class for StorageHandler tests; Use flag to prevent persistence manager closure problem --- build.gradle | 3 -- .../apache/iceberg/hive/MetastoreUtil.java | 22 +++++++++- .../HiveIcebergStorageHandlerBaseTest.java | 43 ++++++++++++++++--- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/build.gradle b/build.gradle index 08fd721bfb52..3069f7c2b8f1 100644 --- a/build.gradle +++ b/build.gradle @@ -483,9 +483,6 @@ if (jdkVersion == '8') { } } - // but first wait for the iceberg-mr (Hive2) tests to complete to avoid concurrency issues around HiveMetastoreTest - test.dependsOn(':iceberg-mr:test') - // exclude these Hive2-specific tests from iceberg-mr test { exclude '**/TestIcebergDateObjectInspector.class' diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index a2b4ea59c99e..77ecee30e245 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -19,24 +19,42 @@ package org.apache.iceberg.hive; +import org.apache.hadoop.hive.metastore.ObjectStore; +import org.apache.iceberg.common.DynMethods; + public class MetastoreUtil { // this class is unique to Hive3 and cannot be found in Hive2, therefore a good proxy to see if // we are working against Hive3 dependencies private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); + private static final DynMethods.StaticMethod MULTIPLE_METASTORES_IN_TEST = + DynMethods.builder("setTwoMetastoreTesting") + .impl(ObjectStore.class, boolean.class) + .orNoop() + .buildStatic(); + private MetastoreUtil() { } /** - * @return true if Hive3 dependencies are found on classpath, false otherwise + * Returns true if Hive3 dependencies are found on classpath, false otherwise. */ public static boolean hive3PresentOnClasspath() { return HIVE3_PRESENT_ON_CLASSPATH; } + /** + * In Hive3, ObjectStore closes and recreates its global PersistenceManagerFactory whenever #setConf is called on it + * with a different config than the one it has cached already. Setting this flag to true prevents the closure of the + * previous PersistenceManagerFactory. In Hive2, this flag does not exist, therefore calling this method is a no-op. + * @param multiple whether multiple metastore instances are used. + */ + public static void usingMultipleMetastoresInTest(boolean multiple) { + MULTIPLE_METASTORES_IN_TEST.invoke(multiple); + } + private static boolean detectHive3() { try { Class.forName(HIVE3_UNIQUE_CLASS); diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java index d0c8dcf435f7..20be76e65917 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java @@ -27,18 +27,22 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.data.Record; +import org.apache.iceberg.hive.MetastoreUtil; import org.apache.iceberg.hive.TestHiveMetastore; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Types; import org.junit.After; +import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -49,6 +53,8 @@ @RunWith(StandaloneHiveRunner.class) public abstract class HiveIcebergStorageHandlerBaseTest { + private static final String DEFAULT_DATABASE_NAME = "default"; + @HiveSQL(files = {}, autoStart = false) private HiveShell shell; @@ -79,17 +85,32 @@ public abstract class HiveIcebergStorageHandlerBaseTest { private static final PartitionSpec SPEC = PartitionSpec.unpartitioned(); - // before variables - protected TestHiveMetastore metastore; + protected static TestHiveMetastore metastore; + private TestTables testTables; public abstract TestTables testTables(Configuration conf, TemporaryFolder tmp) throws IOException; - @Before - public void before() throws IOException { + + @BeforeClass + public static void beforeClass() { + // We're setting this because during HiveRunner operation, some internal threads (e.g. notification event poller) + // are setup which use a different JDBC connection URL than the TestHiveMetastore. In Hive3, this config difference + // could lead to the closure of the global PersistenceManagerFactory instance and result in potential intermittent + // communication failures for those clients which would continue to use their now-closed PMFs. + MetastoreUtil.usingMultipleMetastoresInTest(true); metastore = new TestHiveMetastore(); metastore.start(); + } + @AfterClass + public static void afterClass() { + metastore.stop(); + metastore = null; + } + + @Before + public void before() throws IOException { testTables = testTables(metastore.hiveConf(), temp); for (Map.Entry property : testTables.properties().entrySet()) { @@ -106,9 +127,17 @@ public void before() throws IOException { } @After - public void after() { - metastore.stop(); - metastore = null; + public void after() throws Exception { + Hive db = Hive.get(metastore.hiveConf()); + for (String dbName : db.getAllDatabases()) { + for (String tblName : db.getAllTables(dbName)) { + db.dropTable(dbName, tblName); + } + if (!DEFAULT_DATABASE_NAME.equals(dbName)) { + // Drop cascade, functions dropped by cascade + db.dropDatabase(dbName, true, true, true); + } + } } @Test From 1d7dc49998100765e73a97f73856b7987dace4fc Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Wed, 7 Oct 2020 14:51:40 +0200 Subject: [PATCH 9/9] Add metastore uris as system prop; Remove multiple metastores workaround --- .../apache/iceberg/hive/MetastoreUtil.java | 20 +------------------ .../HiveIcebergStorageHandlerBaseTest.java | 12 ++++------- 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java index 77ecee30e245..ad0ec8071061 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/MetastoreUtil.java @@ -19,21 +19,13 @@ package org.apache.iceberg.hive; -import org.apache.hadoop.hive.metastore.ObjectStore; -import org.apache.iceberg.common.DynMethods; - public class MetastoreUtil { // this class is unique to Hive3 and cannot be found in Hive2, therefore a good proxy to see if // we are working against Hive3 dependencies private static final String HIVE3_UNIQUE_CLASS = "org.apache.hadoop.hive.serde2.io.DateWritableV2"; - private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); - private static final DynMethods.StaticMethod MULTIPLE_METASTORES_IN_TEST = - DynMethods.builder("setTwoMetastoreTesting") - .impl(ObjectStore.class, boolean.class) - .orNoop() - .buildStatic(); + private static final boolean HIVE3_PRESENT_ON_CLASSPATH = detectHive3(); private MetastoreUtil() { } @@ -45,16 +37,6 @@ public static boolean hive3PresentOnClasspath() { return HIVE3_PRESENT_ON_CLASSPATH; } - /** - * In Hive3, ObjectStore closes and recreates its global PersistenceManagerFactory whenever #setConf is called on it - * with a different config than the one it has cached already. Setting this flag to true prevents the closure of the - * previous PersistenceManagerFactory. In Hive2, this flag does not exist, therefore calling this method is a no-op. - * @param multiple whether multiple metastore instances are used. - */ - public static void usingMultipleMetastoresInTest(boolean multiple) { - MULTIPLE_METASTORES_IN_TEST.invoke(multiple); - } - private static boolean detectHive3() { try { Class.forName(HIVE3_UNIQUE_CLASS); diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java index 20be76e65917..857e0db50622 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerBaseTest.java @@ -33,7 +33,6 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.data.Record; -import org.apache.iceberg.hive.MetastoreUtil; import org.apache.iceberg.hive.TestHiveMetastore; import org.apache.iceberg.mr.TestHelper; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; @@ -94,11 +93,6 @@ public abstract class HiveIcebergStorageHandlerBaseTest { @BeforeClass public static void beforeClass() { - // We're setting this because during HiveRunner operation, some internal threads (e.g. notification event poller) - // are setup which use a different JDBC connection URL than the TestHiveMetastore. In Hive3, this config difference - // could lead to the closure of the global PersistenceManagerFactory instance and result in potential intermittent - // communication failures for those clients which would continue to use their now-closed PMFs. - MetastoreUtil.usingMultipleMetastoresInTest(true); metastore = new TestHiveMetastore(); metastore.start(); } @@ -111,15 +105,17 @@ public static void afterClass() { @Before public void before() throws IOException { + String metastoreUris = metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS); + // in Hive3, setting this as a system prop ensures that it will be picked up whenever a new HiveConf is created + System.setProperty(HiveConf.ConfVars.METASTOREURIS.varname, metastoreUris); + testTables = testTables(metastore.hiveConf(), temp); for (Map.Entry property : testTables.properties().entrySet()) { shell.setHiveConfValue(property.getKey(), property.getValue()); } - String metastoreUris = metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS); shell.setHiveConfValue(HiveConf.ConfVars.METASTOREURIS.varname, metastoreUris); - String metastoreWarehouse = metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); shell.setHiveConfValue(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, metastoreWarehouse);