From 01b3461d990de30e903320c25a2411c0e533a710 Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Fri, 26 Jul 2024 13:50:34 +0200 Subject: [PATCH] fix(protobuf/build): Fix protobuf check jar script (#11006) --- .../java/acryl-spark-lineage/README.md | 13 ++++++++----- .../java/datahub/spark/DatahubSparkListener.java | 1 - .../java/datahub-protobuf/scripts/check_jar.sh | 4 +++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/metadata-integration/java/acryl-spark-lineage/README.md b/metadata-integration/java/acryl-spark-lineage/README.md index 81108aa7b914d..9caa5a6dec65d 100644 --- a/metadata-integration/java/acryl-spark-lineage/README.md +++ b/metadata-integration/java/acryl-spark-lineage/README.md @@ -24,7 +24,7 @@ When running jobs using spark-submit, the agent needs to be configured in the co ```text #Configuring DataHub spark agent jar -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.15 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.16 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://localhost:8080 ``` @@ -32,7 +32,7 @@ spark.datahub.rest.server http://localhost:8080 ## spark-submit command line ```sh -spark-submit --packages io.acryl:acryl-spark-lineage:0.2.15 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py +spark-submit --packages io.acryl:acryl-spark-lineage:0.2.16 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py ``` ### Configuration Instructions: Amazon EMR @@ -41,7 +41,7 @@ Set the following spark-defaults configuration properties as it stated [here](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) ```text -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.15 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.16 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server https://your_datahub_host/gms #If you have authentication set up then you also need to specify the Datahub access token @@ -56,7 +56,7 @@ When running interactive jobs from a notebook, the listener can be configured wh spark = SparkSession.builder .master("spark://spark-master:7077") .appName("test-application") -.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.15") +.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.16") .config("spark.extraListeners", "datahub.spark.DatahubSparkListener") .config("spark.datahub.rest.server", "http://localhost:8080") .enableHiveSupport() @@ -79,7 +79,7 @@ appName("test-application") config("spark.master","spark://spark-master:7077") . -config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.13") +config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.16") . config("spark.extraListeners","datahub.spark.DatahubSparkListener") @@ -356,6 +356,9 @@ Use Java 8 to build the project. The project uses Gradle as the build tool. To b + ## Changelog +### Version 0.2.16 +- Remove logging DataHub config into logs + ### Version 0.2.15 - Add Kafka emitter to emit lineage to kafka - Add File emitter to emit lineage to file diff --git a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubSparkListener.java b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubSparkListener.java index d64e159482c1b..52507a682a1f8 100644 --- a/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubSparkListener.java +++ b/metadata-integration/java/acryl-spark-lineage/src/main/java/datahub/spark/DatahubSparkListener.java @@ -257,7 +257,6 @@ private synchronized SparkLineageConf loadDatahubConfig( this.appContext.setDatabricksTags(databricksTags.orElse(null)); } - log.info("Datahub configuration: {}", datahubConf.root().render()); Optional emitterConfig = initializeEmitter(datahubConf); SparkLineageConf sparkLineageConf = SparkLineageConf.toSparkLineageConf(datahubConf, appContext, emitterConfig.orElse(null)); diff --git a/metadata-integration/java/datahub-protobuf/scripts/check_jar.sh b/metadata-integration/java/datahub-protobuf/scripts/check_jar.sh index e3aa181c58801..fe3dd8d18f699 100755 --- a/metadata-integration/java/datahub-protobuf/scripts/check_jar.sh +++ b/metadata-integration/java/datahub-protobuf/scripts/check_jar.sh @@ -39,7 +39,9 @@ jar -tvf $jarFile |\ grep -v "darwin" |\ grep -v "MetadataChangeProposal.avsc" |\ grep -v "aix" |\ - grep -v "com/sun/" + grep -v "com/sun/" |\ + grep -v "VersionInfo.java" |\ + grep -v "mime.types" if [ $? -ne 0 ]; then echo "✅ No unexpected class paths found in ${jarFile}"