diff --git a/.github/trigger_files/IO_Iceberg_Integration_Tests.json b/.github/trigger_files/IO_Iceberg_Integration_Tests.json index 3f63c0c9975f..bbdc3a3910ef 100644 --- a/.github/trigger_files/IO_Iceberg_Integration_Tests.json +++ b/.github/trigger_files/IO_Iceberg_Integration_Tests.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "modification": 2 + "modification": 3 } diff --git a/.github/trigger_files/beam_PostCommit_Python_Xlang_IO_Direct.json b/.github/trigger_files/beam_PostCommit_Python_Xlang_IO_Direct.json index b26833333238..e3d6056a5de9 100644 --- a/.github/trigger_files/beam_PostCommit_Python_Xlang_IO_Direct.json +++ b/.github/trigger_files/beam_PostCommit_Python_Xlang_IO_Direct.json @@ -1,4 +1,4 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run", - "modification": 2 + "modification": 1 } diff --git a/sdks/java/io/expansion-service/build.gradle b/sdks/java/io/expansion-service/build.gradle index 421719b8f986..a27a66b1f3dc 100644 --- a/sdks/java/io/expansion-service/build.gradle +++ b/sdks/java/io/expansion-service/build.gradle @@ -60,7 +60,7 @@ dependencies { runtimeOnly library.java.bigdataoss_gcs_connector // Needed for HiveCatalog runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:1.4.2") - runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow") + runtimeOnly project(path: ":sdks:java:io:iceberg:hive") runtimeOnly library.java.kafka_clients runtimeOnly library.java.slf4j_jdk14 diff --git a/sdks/java/io/iceberg/hive/build.gradle b/sdks/java/io/iceberg/hive/build.gradle index bfa6c75251c4..2d0d2bcc5cde 100644 --- a/sdks/java/io/iceberg/hive/build.gradle +++ b/sdks/java/io/iceberg/hive/build.gradle @@ -21,19 +21,39 @@ plugins { id 'org.apache.beam.module' } applyJavaNature( automaticModuleName: 'org.apache.beam.sdk.io.iceberg.hive', exportJavadoc: false, - shadowClosure: {}, + publish: false, // it's an intermediate jar for io-expansion-service ) description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive" ext.summary = "Runtime dependencies needed for Hive catalog integration." def hive_version = "3.1.3" +def hbase_version = "2.6.1-hadoop3" +def hadoop_version = "3.4.1" def iceberg_version = "1.4.2" +def avatica_version = "1.25.0" dependencies { // dependencies needed to run with iceberg's hive catalog - runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version") - runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow") + // these dependencies are going to be included in io-expansion-service + implementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version") + permitUnusedDeclared ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version") + // analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier + // use "core" classifier to depend on un-shaded jar + runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") { + // old hadoop-yarn-server-resourcemanager contains critical log4j vulneribility + exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-resourcemanager" + // old hadoop-yarn-server-resourcemanager contains critical log4j and hadoop vulneribility + exclude group: "org.apache.hbase", module: "hbase-client" + // old calcite leaks old protobuf-java + exclude group: "org.apache.calcite.avatica", module: "avatica" + } + runtimeOnly ("org.apache.hadoop:hadoop-yarn-server-resourcemanager:$hadoop_version") + runtimeOnly ("org.apache.hbase:hbase-client:$hbase_version") + runtimeOnly ("org.apache.calcite.avatica:avatica-core:$avatica_version") + implementation ("org.apache.hive:hive-metastore:$hive_version") + runtimeOnly ("org.apache.iceberg:iceberg-parquet:$iceberg_version") + permitUnusedDeclared ("org.apache.hive:hive-metastore:$hive_version") // ----- below dependencies are for testing and will not appear in the shaded jar ----- // Beam IcebergIO dependencies @@ -52,8 +72,6 @@ dependencies { testImplementation library.java.junit // needed to set up test Hive Metastore and run tests - testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version") - testImplementation project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow") testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") { exclude group: "org.apache.hive", module: "hive-exec" exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle" @@ -62,6 +80,11 @@ dependencies { testImplementation "org.apache.parquet:parquet-column:1.12.0" } +configurations.all { + // the fatjar "parquet-hadoop-bundle" conflicts with "parquet-hadoop" used by org.apache.iceberg:iceberg-parquet + exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle" +} + task integrationTest(type: Test) { group = "Verification" def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it' diff --git a/sdks/java/io/iceberg/hive/exec/build.gradle b/sdks/java/io/iceberg/hive/exec/build.gradle deleted file mode 100644 index f266ab2ef4db..000000000000 --- a/sdks/java/io/iceberg/hive/exec/build.gradle +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -plugins { - id 'org.apache.beam.module' - id 'java' - id 'com.github.johnrengelman.shadow' -} - -dependencies { - implementation("org.apache.hive:hive-exec:3.1.3") - permitUnusedDeclared("org.apache.hive:hive-exec:3.1.3") -} - -configurations { - shadow -} - -artifacts { - shadow(archives(shadowJar) { - builtBy shadowJar - }) -} - -shadowJar { - zip64 true - - def problematicPackages = [ - 'com.google.protobuf', - 'com.google.common', - 'shaded.parquet', - 'org.apache.parquet', - 'org.joda' - ] - - problematicPackages.forEach { - relocate it, getJavaRelocatedPath("iceberg.hive.${it}") - } - - version "3.1.3" - mergeServiceFiles() - - exclude 'LICENSE' - exclude( - 'org/xml/**', - 'javax/**', - 'com/sun/**' - ) -} -description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive :: Exec" -ext.summary = "A copy of the hive-exec dependency with some popular libraries relocated." diff --git a/settings.gradle.kts b/settings.gradle.kts index d90bb3fb5b82..a8bee45a05ac 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -357,5 +357,3 @@ include("sdks:java:extensions:combiners") findProject(":sdks:java:extensions:combiners")?.name = "combiners" include("sdks:java:io:iceberg:hive") findProject(":sdks:java:io:iceberg:hive")?.name = "hive" -include("sdks:java:io:iceberg:hive:exec") -findProject(":sdks:java:io:iceberg:hive:exec")?.name = "exec"