Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Hive 4.0.0 #818

Merged
merged 5 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
- vector: Add version `0.40.0` ([#802]).
- airflow: Add version `2.9.3` ([#809]).
- kafka: Add version `3.8.0` ([#813]).
- hive: Add version `4.0.0` ([#818]).

### Removed

Expand All @@ -27,6 +28,7 @@ All notable changes to this project will be documented in this file.
[#809]: https://github.com/stackabletech/docker-images/pull/809
[#811]: https://github.com/stackabletech/docker-images/pull/811
[#813]: https://github.com/stackabletech/docker-images/pull/813
[#818]: https://github.com/stackabletech/docker-images/pull/818

## [24.7.0] - 2024-07-24

Expand Down
4 changes: 2 additions & 2 deletions hadoop/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ COPY hadoop/stackable/patches /stackable/patches
RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \
patches/apply_patches.sh ${PRODUCT} && \
cd hadoop-${PRODUCT}-src && \
mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
mvn --no-transfer-progress clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \
# HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \
Expand Down Expand Up @@ -100,7 +100,7 @@ WORKDIR /stackable

RUN curl --fail -L "https://github.com/stackabletech/hdfs-utils/archive/refs/tags/v${HDFS_UTILS}.tar.gz" | tar -xzC . && \
cd hdfs-utils-${HDFS_UTILS} && \
mvn clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
mvn --no-transfer-progress clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
mkdir -p /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib && \
cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar && \
rm -rf /stackable/hdfs-utils-main
Expand Down
22 changes: 18 additions & 4 deletions hive/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,25 @@ curl --fail -L "https://repo.stackable.tech/repository/packages/hive/apache-hive
patches/apply_patches.sh ${PRODUCT}

cd /stackable/apache-hive-${PRODUCT}-src/
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore
mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable

ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin/ /stackable/hive-metastore
cp /stackable/bin/start-metastore /stackable/hive-metastore/bin
if [[ "${PRODUCT}" == "3.1.3" ]] ; then
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects standalone-metastore
mv standalone-metastore/target/apache-hive-metastore-${PRODUCT}-bin/apache-hive-metastore-${PRODUCT}-bin /stackable
else
# https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0
cd standalone-metastore
mvn --batch-mode --no-transfer-progress clean package -DskipTests --projects metastore-server

# We only seem to get a .tar.gz archive, so let's extract that to the correct location
cd /stackable
tar --extract -f /stackable/apache-hive-${PRODUCT}-src/standalone-metastore/metastore-server/target/apache-hive-standalone-metastore-server-${PRODUCT}-bin.tar.gz

# TODO: Remove once the fix https://github.com/apache/hive/pull/5419 is merged and released
# The schemaTool.sh is still pointing to the class location from Hive < 4.0.0, it seems like it was forgotten to update it
sed -i -e 's/CLASS=org.apache.hadoop.hive.metastore.tools.MetastoreSchemaTool/CLASS=org.apache.hadoop.hive.metastore.tools.schematool.MetastoreSchemaTool/' /stackable/apache-hive-metastore-${PRODUCT}-bin/bin/ext/schemaTool.sh
fi

cp /stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT}-bin/bin
rm -rf /stackable/apache-hive-${PRODUCT}-src

curl --fail -L "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
Expand Down
24 changes: 24 additions & 0 deletions hive/stackable/patches/4.0.0/01-postgres-driver.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
diff --git a/standalone-metastore/metastore-server/pom.xml b/standalone-metastore/metastore-server/pom.xml
index a8f680928c..7102f1b5ca 100644
--- a/standalone-metastore/metastore-server/pom.xml
+++ b/standalone-metastore/metastore-server/pom.xml
@@ -334,7 +334,6 @@
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
- <optional>true</optional>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index 28ac5ceb65..e3cbd821bd 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -397,7 +397,6 @@
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>${postgres.version}</version>
- <scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
16 changes: 16 additions & 0 deletions hive/stackable/patches/4.0.0/02-logging-dependencies.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index 28ac5ceb65..03097e7f40 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -494,6 +494,11 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
+ <dependency>
+ <!-- Optional log4j dependency to be able to use the XmlLayout -->
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
+ <artifactId>jackson-dataformat-xml</artifactId>
+ </dependency>
</dependencies>
<build>
<pluginManagement>
12 changes: 12 additions & 0 deletions hive/versions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
versions = [
{
"product": "4.0.0",
"jmx_exporter": "1.0.1",
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
"java-base": "11",
"java-devel": "1.8.0",
"hadoop": "3.3.6",
# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
"aws_java_sdk_bundle": "1.12.367",
"azure_storage": "7.0.1",
"azure_keyvault_core": "1.0.0",
},
{
"product": "3.1.3",
"jmx_exporter": "1.0.1",
Expand Down