#48 no storer write fix - hadoopfs default storer is used

- hdfs test enabled for build, while s3 ignored - readme update
AbsaOSS · Dec 3, 2020 · 601c217 · 601c217
1 parent c7aba25
commit 601c217
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -160,7 +160,7 @@ object ExampleSparkJob {
     import spark.implicits._
 
     // implicit FS is needed for enableControlMeasuresTracking, setCheckpoint calls, e.g. standard HDFS here:
-    implicit val localHdfs = FileSystem.get(new Configuration)
+    implicit val localHdfs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
 
     // Initializing library to hook up to Apache Spark
     spark.enableControlMeasuresTracking(sourceInfoFile = "data/input/_INFO")
@@ -188,8 +188,28 @@ in 'data/input/_INFO'. Two checkpoints are created. Any business logic can be in
 and saving it to Parquet format.  
 
 ### Storing Measurements in AWS S3
-Starting with version 3.0.0, persistence support for AWS S3 has been added. 
-AWS S3 can be both used for loading the measurement data from as well as saving the measurements back to.
+
+#### AWS S3 via Hadoop FS API
+Since version 3.1.0, persistence support for AWS S3 via Hadoop FS API is available. The usage is the same as with 
+regular HDFS with the exception of providing a different file system, e.g.:
+```scala
+import java.net.URI
+import org.apache.hadoop.fs.FileSystem
+import org.apache.spark.sql.SparkSession
+
+val spark = SparkSession
+      .builder()
+      .appName("Example Spark Job")
+      .getOrCreate()
+
+val s3Uri = new URI("s3://my-awesome-bucket")
+implicit  val fs = FileSystem.get(s3Uri, spark.sparkContext.hadoopConfiguration)
+
+```
+The rest of the usage is the same in the example listed above.
+
+#### AWS S3 via AWS SDK for S3
+Starting with version 3.0.0, there is also persistence support for AWS S3 via AWS SDK S3.
 
 The following example demonstrates the setup:
 ```scala
@@ -230,7 +250,7 @@ object S3Example {
 }
 
 ```
-The rest of the processing logic and programatic approach to the library remains unchanged.
+The rest of the processing logic and programmatic approach to the library remains unchanged.
 
 
 ## Atum library routines

diff --git a/atum/src/main/scala/za/co/absa/atum/core/SparkQueryExecutionListener.scala b/atum/src/main/scala/za/co/absa/atum/core/SparkQueryExecutionListener.scala
@@ -44,7 +44,9 @@ class SparkQueryExecutionListener(cf: ControlFrameworkState) extends QueryExecut
           writeInfoFileForQuery(qe)(hadoopStorer.outputFs)
 
         case _ =>
-          Atum.log.info("No usable storer is set, therefore no data will be written the automatically with DF-save to an _INFO file.")
+          Atum.log.info("No storer is set, using default HadoopFs-based bound with DF-save to an inferred _INFO file path.")
+          val defaultFs = FileSystem.get(qe.sparkSession.sparkContext.hadoopConfiguration)
+          writeInfoFileForQuery(qe)(defaultFs)
       }
 
       // Notify listeners

diff --git a/examples/pom.xml b/examples/pom.xml
@@ -75,7 +75,7 @@
 				<artifactId>scalatest-maven-plugin</artifactId>
 				<version>${scalatest.maven.version}</version>
 				<configuration>
-					<skipTests>true</skipTests>
+					<skipTests>false</skipTests>
 				</configuration>
 			</plugin>
 			<!-- Uber jar generation -->

diff --git a/...ples/SampleMeasurementsS3RunnerSpec.scala → ...mpleMeasurementsS3RunnerExampleSpec.scala b/...ples/SampleMeasurementsS3RunnerSpec.scala → ...mpleMeasurementsS3RunnerExampleSpec.scala
@@ -15,10 +15,12 @@
 
 package za.co.absa.atum.examples
 
+import org.scalatest.Ignore
 import org.scalatest.funsuite.AnyFunSuite
 import za.co.absa.atum.utils._
 
-class SampleMeasurementsS3RunnerSpec extends AnyFunSuite
+@Ignore
+class SampleMeasurementsS3RunnerExampleSpec extends AnyFunSuite
   with SparkJobRunnerMethods
   with SparkLocalMaster {