From 6cbb87d3c5845279234c14731537a8abe420aeac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mariusz=20Kry=C5=84ski?= Date: Mon, 31 May 2021 11:41:53 +0200 Subject: [PATCH] move to contribs --- build.sbt | 3 +++ .../scala/io}/delta/storage/BaseExternalLogStore.scala | 5 +++-- .../main/scala/io}/delta/storage/DynamoDBLogStore.scala | 9 ++++----- .../main/scala/io}/delta/storage/MemoryLogStore.scala | 4 ++-- examples/python/extra/dynamodb_logstore.py | 7 ++++--- 5 files changed, 16 insertions(+), 12 deletions(-) rename {src/main/scala/org/apache/spark/sql => contribs/src/main/scala/io}/delta/storage/BaseExternalLogStore.scala (98%) rename {src/main/scala/org/apache/spark/sql => contribs/src/main/scala/io}/delta/storage/DynamoDBLogStore.scala (97%) rename {src/main/scala/org/apache/spark/sql => contribs/src/main/scala/io}/delta/storage/MemoryLogStore.scala (97%) diff --git a/build.sbt b/build.sbt index 07556e2c96d..b8125e645b5 100644 --- a/build.sbt +++ b/build.sbt @@ -102,6 +102,9 @@ lazy val contribs = (project in file("contribs")) commonSettings, scalaStyleSettings, releaseSettings, + libraryDependencies ++= Seq( + "com.amazonaws" % "aws-java-sdk" % "1.7.4" + ), (mappings in (Compile, packageBin)) := (mappings in (Compile, packageBin)).value ++ listPythonFiles(baseDirectory.value.getParentFile / "python"), diff --git a/src/main/scala/org/apache/spark/sql/delta/storage/BaseExternalLogStore.scala b/contribs/src/main/scala/io/delta/storage/BaseExternalLogStore.scala similarity index 98% rename from src/main/scala/org/apache/spark/sql/delta/storage/BaseExternalLogStore.scala rename to contribs/src/main/scala/io/delta/storage/BaseExternalLogStore.scala index 6222557df24..93dddb5c456 100644 --- a/src/main/scala/org/apache/spark/sql/delta/storage/BaseExternalLogStore.scala +++ b/contribs/src/main/scala/io/delta/storage/BaseExternalLogStore.scala @@ -1,5 +1,5 @@ /* - * Copyright (2020) The Delta Lake Project Authors. + * Copyright (2021) The Delta Lake Project Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.spark.sql.delta.storage +package io.delta.storage import java.io.FileNotFoundException import java.net.URI @@ -28,6 +28,7 @@ import org.apache.hadoop.fs._ import org.apache.spark.SparkConf import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.spark.sql.delta.util.FileNames +import org.apache.spark.sql.delta.storage.HadoopFileSystemLogStore abstract class BaseExternalLogStore(sparkConf: SparkConf, hadoopConf: Configuration) extends HadoopFileSystemLogStore(sparkConf, hadoopConf) diff --git a/src/main/scala/org/apache/spark/sql/delta/storage/DynamoDBLogStore.scala b/contribs/src/main/scala/io/delta/storage/DynamoDBLogStore.scala similarity index 97% rename from src/main/scala/org/apache/spark/sql/delta/storage/DynamoDBLogStore.scala rename to contribs/src/main/scala/io/delta/storage/DynamoDBLogStore.scala index e1ee14e0800..7573db5bc9e 100644 --- a/src/main/scala/org/apache/spark/sql/delta/storage/DynamoDBLogStore.scala +++ b/contribs/src/main/scala/io/delta/storage/DynamoDBLogStore.scala @@ -1,5 +1,5 @@ /* - * Copyright (2020) The Delta Lake Project Authors. + * Copyright (2021) The Delta Lake Project Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,10 @@ * limitations under the License. */ -package org.apache.spark.sql.delta.storage +package io.delta.storage import scala.collection.JavaConverters._ +import scala.language.implicitConversions import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient import com.amazonaws.auth.BasicAWSCredentials import com.amazonaws.services.dynamodbv2.model.{ @@ -35,8 +36,6 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf import com.amazonaws.regions.Region import com.amazonaws.regions.Regions -import org.apache.spark.sql.delta.storage - /* DynamoDB requirements: @@ -126,7 +125,7 @@ class DynamoDBLogStore( val length = item.get("length").getN.toLong val modificationTime = item.get("modificationTime").getN.toLong val isComplete = item.get("isComplete").getS() == "true" - storage.LogEntryMetadata( + LogEntryMetadata( path = new Path(s"$parentPath/$filename"), tempPath = tempPath, length = length, diff --git a/src/main/scala/org/apache/spark/sql/delta/storage/MemoryLogStore.scala b/contribs/src/main/scala/io/delta/storage/MemoryLogStore.scala similarity index 97% rename from src/main/scala/org/apache/spark/sql/delta/storage/MemoryLogStore.scala rename to contribs/src/main/scala/io/delta/storage/MemoryLogStore.scala index 71cb04525f6..f6ae6148057 100644 --- a/src/main/scala/org/apache/spark/sql/delta/storage/MemoryLogStore.scala +++ b/contribs/src/main/scala/io/delta/storage/MemoryLogStore.scala @@ -1,5 +1,5 @@ /* - * Copyright (2020) The Delta Lake Project Authors. + * Copyright (2021) The Delta Lake Project Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.spark.sql.delta.storage +package io.delta.storage import java.util.concurrent.{ConcurrentHashMap, TimeUnit} diff --git a/examples/python/extra/dynamodb_logstore.py b/examples/python/extra/dynamodb_logstore.py index 1324ec5fb49..ea5cc6a86f9 100644 --- a/examples/python/extra/dynamodb_logstore.py +++ b/examples/python/extra/dynamodb_logstore.py @@ -34,8 +34,8 @@ pass # Create SparkContext -sc = SparkContext() -sqlContext = SQLContext(sc) +# sc = SparkContext() +# sqlContext = SQLContext(sc) # Enable SQL for the current spark session. we need to set the following configs to enable SQL # Commands @@ -47,9 +47,10 @@ .master("local[*]") \ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ .config("spark.sql.sources.parallelPartitionDiscovery.parallelism", "8") \ - .config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.DynamoDBLogStore") \ + .config("spark.delta.logStore.class", "io.delta.storage.DynamoDBLogStore") \ .getOrCreate() + # Apache Spark 2.4.x has a known issue (SPARK-25003) that requires explicit activation # of the extension and cloning of the session. This will unnecessary in Apache Spark 3.x. if spark.sparkContext.version < "3.":