From eca7c17eb03ac287e0f8b2e0cb221fb08e174fda Mon Sep 17 00:00:00 2001 From: Sebastian Date: Wed, 22 Nov 2023 15:09:31 +0100 Subject: [PATCH 1/7] bump: bump avro to 1.13.1 --- project/Dependencies.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 296f70d3d6..379503d6cb 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -163,7 +163,7 @@ object Dependencies { val AvroParquet = Seq( libraryDependencies ++= Seq( - "org.apache.parquet" % "parquet-avro" % "1.10.1" //Apache2 + "org.apache.parquet" % "parquet-avro" % "1.13.1" //Apache2 ) ) @@ -230,7 +230,7 @@ object Dependencies { libraryDependencies ++= Seq( // https://github.com/googleapis/java-bigquerystorage/tree/master/proto-google-cloud-bigquerystorage-v1 "com.google.api.grpc" % "proto-google-cloud-bigquerystorage-v1" % "1.22.0" % "protobuf-src", // ApacheV2 - "org.apache.avro" % "avro" % "1.9.2" % "provided", + "org.apache.avro" % "avro" % "1.11.3" % "provided", "org.apache.arrow" % "arrow-vector" % "4.0.0" % "provided", "io.grpc" % "grpc-auth" % akka.grpc.gen.BuildInfo.grpcVersion, // ApacheV2 "com.typesafe.akka" %% "akka-http-spray-json" % AkkaHttpVersion, From 2e02b6f6737b6e15c6455a72e8dc5b7d9b360a38 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Wed, 22 Nov 2023 15:16:14 +0100 Subject: [PATCH 2/7] override to fixed version --- project/Dependencies.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 379503d6cb..dda29e815d 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -163,8 +163,10 @@ object Dependencies { val AvroParquet = Seq( libraryDependencies ++= Seq( - "org.apache.parquet" % "parquet-avro" % "1.13.1" //Apache2 - ) + "org.apache.parquet" % "parquet-avro" % "1.13.1", //Apache2 + "org.apache.avro" % "avro" % "1.11.3" + ), + ) val AvroParquetTests = Seq( From 7d369b4a36619e87ccc1dfa347c5d0e33a7286f8 Mon Sep 17 00:00:00 2001 From: sebastian-alfers Date: Wed, 22 Nov 2023 15:42:47 +0100 Subject: [PATCH 3/7] commenet to explain the override --- project/Dependencies.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index dda29e815d..6814f8a193 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -164,7 +164,8 @@ object Dependencies { val AvroParquet = Seq( libraryDependencies ++= Seq( "org.apache.parquet" % "parquet-avro" % "1.13.1", //Apache2 - "org.apache.avro" % "avro" % "1.11.3" + // override the version brought in by parquet-avro to fix CVE-2023-39410 + "org.apache.avro" % "avro" % "1.11.3" //Apache2 ), ) From 54c85e2e412800654280e4da65501cacf769d277 Mon Sep 17 00:00:00 2001 From: sebastian-alfers Date: Wed, 22 Nov 2023 16:20:40 +0100 Subject: [PATCH 4/7] sbt fmt, remove deprecated api --- .../src/test/scala/docs/scaladsl/AbstractAvroParquet.scala | 7 +++---- project/Dependencies.scala | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala b/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala index b55cf80e54..d3772d2248 100644 --- a/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala +++ b/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala @@ -5,7 +5,6 @@ package docs.scaladsl import java.io.File - import akka.testkit.TestKit import com.sksamuel.avro4s.RecordFormat import org.apache.avro.Schema @@ -14,7 +13,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.avro.{AvroParquetReader, AvroParquetWriter, AvroReadSupport} import org.apache.parquet.hadoop.{ParquetReader, ParquetWriter} -import org.apache.parquet.hadoop.util.HadoopInputFile +import org.apache.parquet.hadoop.util.{HadoopInputFile, HadoopOutputFile} import org.scalacheck.Gen import org.scalatest.{BeforeAndAfterAll, Suite} @@ -48,8 +47,8 @@ trait AbstractAvroParquet extends BeforeAndAfterAll { conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, true) def parquetWriter[T <: GenericRecord](file: String, conf: Configuration, schema: Schema): ParquetWriter[T] = - AvroParquetWriter.builder[T](new Path(file)).withConf(conf).withSchema(schema).build() - + AvroParquetWriter.builder[T](HadoopOutputFile.fromPath(new Path(file), conf)).withConf(conf).withSchema(schema).build() + def parquetReader[T <: GenericRecord](file: String, conf: Configuration): ParquetReader[T] = AvroParquetReader.builder[T](HadoopInputFile.fromPath(new Path(file), conf)).withConf(conf).build() diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 6814f8a193..770adcc178 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -166,8 +166,7 @@ object Dependencies { "org.apache.parquet" % "parquet-avro" % "1.13.1", //Apache2 // override the version brought in by parquet-avro to fix CVE-2023-39410 "org.apache.avro" % "avro" % "1.11.3" //Apache2 - ), - + ) ) val AvroParquetTests = Seq( From d81f6c81787491e1a8feee33a2c68727ad0736a1 Mon Sep 17 00:00:00 2001 From: sebastian-alfers Date: Wed, 22 Nov 2023 16:30:58 +0100 Subject: [PATCH 5/7] fix more deprecations --- .../docs/scaladsl/AbstractAvroParquet.scala | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala b/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala index d3772d2248..6e65d050bf 100644 --- a/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala +++ b/avroparquet-tests/src/test/scala/docs/scaladsl/AbstractAvroParquet.scala @@ -47,8 +47,12 @@ trait AbstractAvroParquet extends BeforeAndAfterAll { conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, true) def parquetWriter[T <: GenericRecord](file: String, conf: Configuration, schema: Schema): ParquetWriter[T] = - AvroParquetWriter.builder[T](HadoopOutputFile.fromPath(new Path(file), conf)).withConf(conf).withSchema(schema).build() - + AvroParquetWriter + .builder[T](HadoopOutputFile.fromPath(new Path(file), conf)) + .withConf(conf) + .withSchema(schema) + .build() + def parquetReader[T <: GenericRecord](file: String, conf: Configuration): ParquetReader[T] = AvroParquetReader.builder[T](HadoopInputFile.fromPath(new Path(file), conf)).withConf(conf).build() @@ -89,8 +93,11 @@ trait AbstractAvroParquet extends BeforeAndAfterAll { val file: String = "./sample/path/test.parquet" val conf: Configuration = new Configuration() conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, true) - val writer: ParquetWriter[Record] = - AvroParquetWriter.builder[Record](new Path(file)).withConf(conf).withSchema(schema).build() + val writer: ParquetWriter[Record] = AvroParquetWriter + .builder[Record](HadoopOutputFile.fromPath(new Path(file), conf)) + .withConf(conf) + .withSchema(schema) + .build() // #prepare-sink if (writer != null) { // forces val usage } @@ -106,7 +113,11 @@ trait AbstractAvroParquet extends BeforeAndAfterAll { val file: String = "./sample/path/test.parquet" val writer: ParquetWriter[GenericRecord] = - AvroParquetWriter.builder[GenericRecord](new Path(file)).withConf(conf).withSchema(schema).build() + AvroParquetWriter + .builder[GenericRecord](HadoopOutputFile.fromPath(new Path(file), conf)) + .withConf(conf) + .withSchema(schema) + .build() // #init-writer // #init-reader val reader: ParquetReader[GenericRecord] = From 4d705b93e00c4a948b80c0ca0aa9a95b5a35dfd3 Mon Sep 17 00:00:00 2001 From: sebastian-alfers Date: Wed, 22 Nov 2023 16:38:23 +0100 Subject: [PATCH 6/7] more depreacation... --- .../src/test/java/docs/javadsl/AvroParquetSinkTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/avroparquet-tests/src/test/java/docs/javadsl/AvroParquetSinkTest.java b/avroparquet-tests/src/test/java/docs/javadsl/AvroParquetSinkTest.java index bd2eb8eab5..8874d996a2 100644 --- a/avroparquet-tests/src/test/java/docs/javadsl/AvroParquetSinkTest.java +++ b/avroparquet-tests/src/test/java/docs/javadsl/AvroParquetSinkTest.java @@ -19,6 +19,7 @@ import org.apache.parquet.avro.AvroReadSupport; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetReader; +import org.apache.parquet.hadoop.util.HadoopOutputFile; import org.junit.After; import org.junit.Before; import org.junit.Rule; @@ -75,7 +76,7 @@ public void createNewParquetFile() Configuration conf = new Configuration(); conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, true); ParquetWriter writer = - AvroParquetWriter.builder(new Path(file)) + AvroParquetWriter.builder(HadoopOutputFile.fromPath(new Path(file), conf)) .withConf(conf) .withWriteMode(ParquetFileWriter.Mode.OVERWRITE) .withSchema(schema) From 684e98907b436cc8bb02e8a71d8383a5a8bd9129 Mon Sep 17 00:00:00 2001 From: sebastian-alfers Date: Wed, 22 Nov 2023 16:52:00 +0100 Subject: [PATCH 7/7] more deprecation --- avroparquet-tests/src/test/java/docs/javadsl/Examples.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/avroparquet-tests/src/test/java/docs/javadsl/Examples.java b/avroparquet-tests/src/test/java/docs/javadsl/Examples.java index e4093523e1..3b7cf4775a 100644 --- a/avroparquet-tests/src/test/java/docs/javadsl/Examples.java +++ b/avroparquet-tests/src/test/java/docs/javadsl/Examples.java @@ -22,6 +22,7 @@ import org.apache.avro.Schema; import akka.stream.javadsl.Source; import org.apache.parquet.avro.AvroParquetReader; +import org.apache.parquet.hadoop.util.HadoopOutputFile; // #init-reader public class Examples { @@ -53,7 +54,7 @@ public Examples() throws IOException { // #init-flow ParquetWriter writer = - AvroParquetWriter.builder(new Path("./test.parquet")) + AvroParquetWriter.builder(HadoopOutputFile.fromPath(new Path("./test.parquet"), conf)) .withConf(conf) .withSchema(schema) .build();