From 77a3b541179d185d410737edff5ae9f3e16426cb Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Thu, 10 Sep 2020 11:54:40 +0200 Subject: [PATCH 1/3] Create Hive2 and Hive3 iceberg-metastore modules; Upgrade MR and Flink to use Hive 3 --- .gitignore | 2 + build.gradle | 102 ++++++++++++++++-- .../mr/hive/HiveIcebergFilterFactory.java | 2 +- .../mr/hive/HiveIcebergStorageHandler.java | 5 + .../IcebergDateObjectInspector.java | 12 +-- .../IcebergTimestampObjectInspector.java | 12 +-- .../TestIcebergDateObjectInspector.java | 8 +- .../TestIcebergTimestampObjectInspector.java | 8 +- settings.gradle | 2 + versions.props | 8 +- 10 files changed, 127 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index b9bfdca32886..eb9efe6e2f40 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,5 @@ derby.log # Python stuff python/.mypy_cache/ + +hive2-metastore/src diff --git a/build.gradle b/build.gradle index dd50af6ac1c5..06d025b85a10 100644 --- a/build.gradle +++ b/build.gradle @@ -362,6 +362,87 @@ project(':iceberg-hive-metastore') { } } + +project(':iceberg-hive2-metastore') { + + task copyFilesFromHive3(type: Copy) { + copy { + from('../hive-metastore/src/main/java/') + into 'src/main/java/' + } + copy { + from('../hive-metastore/src/main/resources/') + into 'src/main/resources/' + } + copy { + from('../hive-metastore/src/test/java/') + into 'src/test/java/' + } + copy { + from('../hive-metastore/src/test/resources/') + into 'src/test/resources/' + } + } + + dependencies { + + compile project(':iceberg-core') + + compileOnly "org.apache.avro:avro" + + compileOnly("org.apache.hive:hive-metastore:2.3.7") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + + testCompile("org.apache.hive:hive-exec:2.3.7") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hive', module: 'hive-llap-tez' + exclude group: 'org.apache.logging.log4j' + exclude group: 'com.google.protobuf', module: 'protobuf-java' + exclude group: 'org.apache.calcite' + exclude group: 'org.apache.calcite.avatica' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + } + + testCompile("org.apache.hive:hive-metastore:2.3.7") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.pentaho' // missing dependency + exclude group: 'org.apache.hbase' + exclude group: 'org.apache.logging.log4j' + exclude group: 'co.cask.tephra' + exclude group: 'com.google.code.findbugs', module: 'jsr305' + exclude group: 'org.eclipse.jetty.aggregate', module: 'jetty-all' + exclude group: 'org.eclipse.jetty.orbit', module: 'javax.servlet' + exclude group: 'org.apache.parquet', module: 'parquet-hadoop-bundle' + exclude group: 'com.tdunning', module: 'json' + exclude group: 'javax.transaction', module: 'transaction-api' + exclude group: 'com.zaxxer', module: 'HikariCP' + } + + compileOnly("org.apache.hadoop:hadoop-client:2.7.3") { + exclude group: 'org.apache.avro', module: 'avro' + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + } + + testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') + } +} + project(':iceberg-mr') { configurations { testCompile { @@ -404,7 +485,7 @@ project(':iceberg-mr') { testCompile("org.apache.calcite:calcite-core") testCompile("com.esotericsoftware:kryo-shaded:4.0.2") testCompile("com.fasterxml.jackson.core:jackson-annotations:2.6.5") - testCompile("com.klarna:hiverunner:5.2.1") { + testCompile("com.klarna:hiverunner:6.0.1") { exclude group: 'javax.jms', module: 'jms' exclude group: 'org.apache.hive', module: 'hive-exec' exclude group: 'org.codehaus.jettison', module: 'jettison' @@ -603,8 +684,10 @@ if (jdkVersion == '8') { compile project(':iceberg-orc') compile project(':iceberg-parquet') compile project(':iceberg-arrow') - compile project(':iceberg-hive-metastore') - compile project(':iceberg-spark') + compile project(':iceberg-hive2-metastore') + compile(project(":iceberg-spark")) { + exclude module: "iceberg-hive-metastore" + } compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { @@ -613,12 +696,13 @@ if (jdkVersion == '8') { testCompile project(path: ':iceberg-spark', configuration: 'testArtifacts') - testCompile "org.apache.hadoop:hadoop-hdfs::tests" - testCompile "org.apache.hadoop:hadoop-common::tests" - testCompile("org.apache.hadoop:hadoop-minicluster") { + testCompile "org.apache.hadoop:hadoop-hdfs:2.7.3" + testCompile "org.apache.hadoop:hadoop-common:2.7.3" + testCompile("org.apache.hadoop:hadoop-minicluster:2.7.3") { exclude group: 'org.apache.avro', module: 'avro' } - testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') + + testCompile project(path: ':iceberg-hive2-metastore', configuration: 'testArtifacts') testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') } @@ -709,7 +793,7 @@ project(':iceberg-spark3') { compile project(':iceberg-orc') compile project(':iceberg-parquet') compile project(':iceberg-arrow') - compile project(':iceberg-hive-metastore') + compile project(':iceberg-hive2-metastore') compile project(':iceberg-spark') compileOnly "org.apache.avro:avro" @@ -725,7 +809,7 @@ project(':iceberg-spark3') { testCompile("org.apache.hadoop:hadoop-minicluster") { exclude group: 'org.apache.avro', module: 'avro' } - testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') + testCompile project(path: ':iceberg-hive2-metastore', configuration: 'testArtifacts') testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') } diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java index 63e823c65815..b54d8183310c 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergFilterFactory.java @@ -127,7 +127,7 @@ private static Object leafToLiteral(PredicateLeaf leaf) { case FLOAT: return leaf.getLiteral(); case DATE: - return daysFromTimestamp((Timestamp) leaf.getLiteral()); + return daysFromDate((Date) leaf.getLiteral()); case TIMESTAMP: return microsFromTimestamp((Timestamp) LITERAL_FIELD.get(leaf)); case DECIMAL: diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index d6c4c2feab91..f085715b8111 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -81,6 +81,11 @@ public void configureInputJobProperties(TableDesc tableDesc, Map map.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(table.schema())); } + @Override + public void configureInputJobCredentials(TableDesc tableDesc, Map secrets) { + + } + @Override public void configureOutputJobProperties(TableDesc tableDesc, Map map) { diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java index 122ffc660a5a..510b2052aa0b 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspector.java @@ -19,9 +19,9 @@ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Date; import java.time.LocalDate; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -42,17 +42,17 @@ private IcebergDateObjectInspector() { @Override public Date getPrimitiveJavaObject(Object o) { - return o == null ? null : Date.valueOf((LocalDate) o); + return o == null ? null : Date.valueOf(o.toString()); } @Override - public DateWritable getPrimitiveWritableObject(Object o) { - return o == null ? null : new DateWritable(DateTimeUtil.daysFromDate((LocalDate) o)); + public DateWritableV2 getPrimitiveWritableObject(Object o) { + return o == null ? null : new DateWritableV2(DateTimeUtil.daysFromDate((LocalDate) o)); } @Override public Object copyObject(Object o) { - return o == null ? null : new Date(((Date) o).getTime()); + return o == null ? null : Date.valueOf(o.toString()); } } diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java index f05eaa5c44b5..e2baa246cbe7 100644 --- a/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java +++ b/mr/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspector.java @@ -19,10 +19,10 @@ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Timestamp; import java.time.LocalDateTime; import java.time.OffsetDateTime; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -57,13 +57,13 @@ private IcebergTimestampObjectInspector() { @Override public Timestamp getPrimitiveJavaObject(Object o) { - return o == null ? null : Timestamp.valueOf(toLocalDateTime(o)); + return o == null ? null : Timestamp.valueOf(o.toString()); } @Override - public TimestampWritable getPrimitiveWritableObject(Object o) { + public TimestampWritableV2 getPrimitiveWritableObject(Object o) { Timestamp ts = getPrimitiveJavaObject(o); - return ts == null ? null : new TimestampWritable(ts); + return ts == null ? null : new TimestampWritableV2(ts); } @Override @@ -73,7 +73,7 @@ public Object copyObject(Object o) { } Timestamp ts = (Timestamp) o; - Timestamp copy = new Timestamp(ts.getTime()); + Timestamp copy = new Timestamp(ts); copy.setNanos(ts.getNanos()); return copy; } diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspector.java b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspector.java index a263a2b503b6..4b76dc2cf9ef 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspector.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspector.java @@ -19,9 +19,9 @@ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Date; import java.time.LocalDate; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; @@ -42,7 +42,7 @@ public void testIcebergDateObjectInspector() { Assert.assertEquals(TypeInfoFactory.dateTypeInfo.getTypeName(), oi.getTypeName()); Assert.assertEquals(Date.class, oi.getJavaPrimitiveClass()); - Assert.assertEquals(DateWritable.class, oi.getPrimitiveWritableClass()); + Assert.assertEquals(DateWritableV2.class, oi.getPrimitiveWritableClass()); Assert.assertNull(oi.copyObject(null)); Assert.assertNull(oi.getPrimitiveJavaObject(null)); @@ -52,7 +52,7 @@ public void testIcebergDateObjectInspector() { Date date = Date.valueOf("2020-01-01"); Assert.assertEquals(date, oi.getPrimitiveJavaObject(local)); - Assert.assertEquals(new DateWritable(date), oi.getPrimitiveWritableObject(local)); + Assert.assertEquals(new DateWritableV2(date), oi.getPrimitiveWritableObject(local)); Date copy = (Date) oi.copyObject(date); diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspector.java b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspector.java index 7c91e808441e..9eacaf1e8040 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspector.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspector.java @@ -19,9 +19,9 @@ package org.apache.iceberg.mr.hive.serde.objectinspector; -import java.sql.Timestamp; import java.time.LocalDateTime; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; @@ -42,7 +42,7 @@ public void testIcebergTimestampObjectInspector() { Assert.assertEquals(TypeInfoFactory.timestampTypeInfo.getTypeName(), oi.getTypeName()); Assert.assertEquals(Timestamp.class, oi.getJavaPrimitiveClass()); - Assert.assertEquals(TimestampWritable.class, oi.getPrimitiveWritableClass()); + Assert.assertEquals(TimestampWritableV2.class, oi.getPrimitiveWritableClass()); Assert.assertNull(oi.copyObject(null)); Assert.assertNull(oi.getPrimitiveJavaObject(null)); @@ -52,7 +52,7 @@ public void testIcebergTimestampObjectInspector() { Timestamp ts = Timestamp.valueOf("2020-01-01 00:00:00"); Assert.assertEquals(ts, oi.getPrimitiveJavaObject(local)); - Assert.assertEquals(new TimestampWritable(ts), oi.getPrimitiveWritableObject(local)); + Assert.assertEquals(new TimestampWritableV2(ts), oi.getPrimitiveWritableObject(local)); Timestamp copy = (Timestamp) oi.copyObject(ts); diff --git a/settings.gradle b/settings.gradle index b7dc68b0c85b..8ad5d8b45f87 100644 --- a/settings.gradle +++ b/settings.gradle @@ -34,6 +34,7 @@ include 'spark3' include 'spark3-runtime' include 'pig' include 'hive-metastore' +include 'hive2-metastore' project(':api').name = 'iceberg-api' project(':common').name = 'iceberg-common' @@ -51,6 +52,7 @@ project(':spark3').name = 'iceberg-spark3' project(':spark3-runtime').name = 'iceberg-spark3-runtime' project(':pig').name = 'iceberg-pig' project(':hive-metastore').name = 'iceberg-hive-metastore' +project(':hive2-metastore').name = 'iceberg-hive2-metastore' if (JavaVersion.current() == JavaVersion.VERSION_1_8) { include 'spark2' diff --git a/versions.props b/versions.props index 543eb531574b..91f6614c763d 100644 --- a/versions.props +++ b/versions.props @@ -1,9 +1,9 @@ org.slf4j:* = 1.7.25 org.apache.avro:avro = 1.9.2 org.apache.flink:* = 1.11.0 -org.apache.hadoop:* = 2.7.3 -org.apache.hive:hive-metastore = 2.3.7 -org.apache.hive:hive-serde = 2.3.7 +org.apache.hadoop:* = 3.1.0 +org.apache.hive:hive-metastore = 3.1.2 +org.apache.hive:hive-serde = 3.1.2 org.apache.orc:* = 1.6.3 org.apache.parquet:* = 1.11.0 org.apache.spark:spark-hive_2.11 = 2.4.6 @@ -21,4 +21,4 @@ com.github.stephenc.findbugs:findbugs-annotations = 1.3.9-1 # test deps junit:junit = 4.12 org.mockito:mockito-core = 1.10.19 -org.apache.hive:hive-exec = 2.3.7 +org.apache.hive:hive-exec = 3.1.2 From ecedc195805ad487e4a29dabb7eee82e18547d34 Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Thu, 10 Sep 2020 12:20:51 +0200 Subject: [PATCH 2/3] spark should depend on hive2 --- build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 06d025b85a10..084b5b98110f 100644 --- a/build.gradle +++ b/build.gradle @@ -630,7 +630,7 @@ project(':iceberg-spark') { compile project(':iceberg-orc') compile project(':iceberg-parquet') compile project(':iceberg-arrow') - compile project(':iceberg-hive-metastore') + compile project(':iceberg-hive2-metastore') compile project(':iceberg-arrow') compileOnly "org.apache.avro:avro" @@ -643,7 +643,7 @@ project(':iceberg-spark') { testCompile("org.apache.hadoop:hadoop-minicluster") { exclude group: 'org.apache.avro', module: 'avro' } - testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') + testCompile project(path: ':iceberg-hive2-metastore', configuration: 'testArtifacts') testCompile project(path: ':iceberg-api', configuration: 'testArtifacts') testCompile project(path: ':iceberg-data', configuration: 'testArtifacts') } From 954a4b3319a9debc045033a8fda0e74f7a60848f Mon Sep 17 00:00:00 2001 From: Marton Bod Date: Fri, 11 Sep 2020 14:17:18 +0200 Subject: [PATCH 3/3] Fix hive 3 test metastore teardown issue --- build.gradle | 4 +--- .../org/apache/iceberg/hive/TestHiveMetastore.java | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 084b5b98110f..68cd63dfd0ef 100644 --- a/build.gradle +++ b/build.gradle @@ -685,9 +685,7 @@ if (jdkVersion == '8') { compile project(':iceberg-parquet') compile project(':iceberg-arrow') compile project(':iceberg-hive2-metastore') - compile(project(":iceberg-spark")) { - exclude module: "iceberg-hive-metastore" - } + compile project(":iceberg-spark") compileOnly "org.apache.avro:avro" compileOnly("org.apache.spark:spark-hive_2.11") { diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index d45d3df53066..13009b64332c 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; +import java.lang.reflect.Method; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -80,6 +81,15 @@ public void stop() { if (hiveLocalDir != null) { hiveLocalDir.delete(); } + + // remove raw store if exists + try { + Method cleanupRawStore = HiveMetaStore.class.getDeclaredMethod("cleanupRawStore"); + cleanupRawStore.setAccessible(true); + cleanupRawStore.invoke(null); + } catch (Exception e) { + // no op + } } public HiveConf hiveConf() {