diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala
index 78cfef782..beeb69089 100644
--- a/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala
+++ b/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala
@@ -18,7 +18,7 @@ package za.co.absa.enceladus.dao.auth
import com.typesafe.config.ConfigFactory
import org.apache.spark.sql.SparkSession
import sun.security.krb5.internal.ktab.KeyTab
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.HdfsUtils
sealed abstract class MenasCredentials {
val username: String
@@ -40,9 +40,9 @@ object MenasPlainCredentials {
* @return An instance of Menas Credentials.
*/
def fromFile(path: String)(implicit spark: SparkSession): MenasPlainCredentials = {
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
- val conf = ConfigFactory.parseString(fsUtils.getFileContent(path))
+ val conf = ConfigFactory.parseString(fsUtils.getLocalOrDistributedFileContent(path))
MenasPlainCredentials(conf.getString("username"), conf.getString("password"))
}
}
@@ -55,9 +55,9 @@ object MenasKerberosCredentials {
* @return An instance of Menas Credentials.
*/
def fromFile(path: String)(implicit spark: SparkSession): MenasKerberosCredentials = {
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
- val localKeyTabPath = fsUtils.getLocalPathToFile(path)
+ val localKeyTabPath = fsUtils.getLocalPathToFileOrCopyToLocal(path)
val keytab = KeyTab.getInstance(localKeyTabPath)
val username = keytab.getOneName.getName
diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala
index 6b8941503..16b2db6c8 100644
--- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala
+++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala
@@ -15,10 +15,12 @@
package za.co.absa.enceladus.dao.rest
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{BeforeAndAfter, Matchers, WordSpec}
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.wordspec.AnyWordSpec
+import org.scalatest.BeforeAndAfter
+import org.scalatest.matchers.should.Matchers
-abstract class BaseTestSuite extends WordSpec
+abstract class BaseTestSuite extends AnyWordSpec
with Matchers
with MockitoSugar
with BeforeAndAfter
diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala
index 937c43815..5d546150e 100644
--- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala
+++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala
@@ -15,11 +15,12 @@
package za.co.absa.enceladus.dao.rest
-import org.scalatest.{Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.dao.UnauthorizedException
import za.co.absa.enceladus.dao.auth.{InvalidMenasCredentials, MenasKerberosCredentials, MenasPlainCredentials}
-class RestDaoFactorySuite extends WordSpec with Matchers {
+class RestDaoFactorySuite extends AnyWordSpec with Matchers {
private val menasApiBaseUrls = List("http://localhost:8080/menas/api")
diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala
index 4e0a7df54..d6488918f 100644
--- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala
+++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala
@@ -16,15 +16,17 @@
package za.co.absa.enceladus.dao.rest.auth
import org.mockito.stubbing.OngoingStubbing
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{BeforeAndAfter, Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.wordspec.AnyWordSpec
+import org.scalatest.BeforeAndAfter
import org.springframework.http.{HttpHeaders, ResponseEntity}
import org.springframework.util.LinkedMultiValueMap
import org.springframework.web.client.RestTemplate
import za.co.absa.enceladus.dao.UnauthorizedException
import za.co.absa.enceladus.dao.rest.{ApiCaller, ApiCallerStub, AuthClient}
-abstract class AuthClientSuite() extends WordSpec
+abstract class AuthClientSuite() extends AnyWordSpec
with Matchers
with MockitoSugar
with BeforeAndAfter {
diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala
index 1b920a7e3..e7c8429d9 100644
--- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala
+++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala
@@ -15,12 +15,12 @@
package za.co.absa.enceladus.dao.rest.auth
-import org.scalatest.WordSpec
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.dao.auth.MenasPlainCredentials
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.LocalFsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class MenasPlainCredentialsSuite extends WordSpec with SparkTestBase {
+class MenasPlainCredentialsSuite extends AnyWordSpec with SparkTestBase {
"MenasPlainCredentials" should {
"be read from *.conf" in {
@@ -42,9 +42,7 @@ class MenasPlainCredentialsSuite extends WordSpec with SparkTestBase {
val homeDir = System.getProperty("user.home")
val expected = s"$homeDir/dir/file"
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
-
- val actual = fsUtils.replaceHome("~/dir/file")
+ val actual = LocalFsUtils.replaceHome("~/dir/file")
assert(actual == expected)
}
}
diff --git a/data-model/pom.xml b/data-model/pom.xml
index ec7b7d507..0a20ba361 100644
--- a/data-model/pom.xml
+++ b/data-model/pom.xml
@@ -53,6 +53,12 @@
${scalatest.version}
compile
+
+ org.scalatest
+ scalatest-funsuite_${scala.compat.version}
+ ${scalatest.version}
+ compile
+
diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala
index 7ee290c8f..757dabda4 100644
--- a/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala
+++ b/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala
@@ -17,9 +17,10 @@ package za.co.absa.enceladus.model.conformanceRule
import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
-import org.scalatest.{Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
-class ConformanceRuleTest extends WordSpec with Matchers {
+class ConformanceRuleTest extends AnyWordSpec with Matchers {
private val objectMapper = new ObjectMapper()
.registerModule(DefaultScalaModule)
diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala
index 44db23caa..5d76ade0c 100644
--- a/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala
+++ b/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala
@@ -15,12 +15,12 @@
package za.co.absa.enceladus.model.menas.audit
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.{DropConformanceRule, LiteralConformanceRule}
import za.co.absa.enceladus.model.conformanceRule.ConformanceRule
-class AuditableTest extends FunSuite {
+class AuditableTest extends AnyFunSuite {
val obj1 = Dataset(name = "Test DS",
version = 0,
hdfsPath = "oldPath",
diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala
index 58fe2d223..eb46b94a4 100644
--- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala
+++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala
@@ -23,6 +23,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials
import za.co.absa.enceladus.dao.rest.RestDaoFactory
import za.co.absa.enceladus.examples.interpreter.rules.custom.UppercaseCustomConformanceRule
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
object CustomRuleSample1 {
@@ -37,6 +38,8 @@ object CustomRuleSample1 {
.getOrCreate()
TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
def main(args: Array[String]) {
// scalastyle:off magic.number
val menasBaseUrls = List("http://localhost:8080/menas")
@@ -78,7 +81,7 @@ object CustomRuleSample1 {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
outputData.show(false)
//scalastyle:on magicnumber
diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala
index de79ace35..5ce905cde 100644
--- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala
+++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala
@@ -24,6 +24,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.examples.interpreter.rules.custom.LPadCustomConformanceRule
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
object CustomRuleSample2 {
@@ -38,6 +39,8 @@ object CustomRuleSample2 {
.getOrCreate()
TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
def main(args: Array[String]) {
// scalastyle:off magic.number
val conf = ConfigFactory.load()
@@ -81,7 +84,7 @@ object CustomRuleSample2 {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
outputData.show(false)
// scalastyle:on magic.number
diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala
index 932fa9fac..59fe41b7a 100644
--- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala
+++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala
@@ -24,6 +24,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.examples.interpreter.rules.custom.{LPadCustomConformanceRule, UppercaseCustomConformanceRule}
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
object CustomRuleSample3 {
@@ -33,6 +34,7 @@ object CustomRuleSample3 {
.config("spark.sql.codegen.wholeStage", value = false)
.getOrCreate()
TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
def main(args: Array[String]): Unit = {
val conf = ConfigFactory.load()
@@ -79,7 +81,7 @@ object CustomRuleSample3 {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
outputData.show()
}
diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala
index fcae9619e..47e8dd649 100644
--- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala
+++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala
@@ -26,6 +26,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.examples.interpreter.rules.custom.{LPadCustomConformanceRule, UppercaseCustomConformanceRule}
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
object CustomRuleSample4 {
@@ -138,6 +139,7 @@ object CustomRuleSample4 {
def main(args: Array[String]): Unit = {
val cmd: CmdConfigLocal = getCmdLineArguments(args)
implicit val spark: SparkSession = buildSparkSession()
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val conf = ConfigFactory.load()
val menasBaseUrls = MenasConnectionStringParser.parse(conf.getString("menas.rest.uri"))
@@ -186,7 +188,7 @@ object CustomRuleSample4 {
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
- val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
outputData.show()
saveToCsv(outputData, cmd.outPath)
}
diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala
index fb0202ad2..5009aa7c8 100644
--- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala
+++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala
@@ -17,12 +17,13 @@ package za.co.absa.enceladus.examples.interpreter.rules.custom
import org.apache.spark.sql
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
-import org.scalatest.mockito.MockitoSugar
+import org.scalatest.funsuite.AnyFunSuite
+import org.mockito.scalatest.MockitoSugar
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches}
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
@@ -32,11 +33,12 @@ object TestOutputRow {
def apply(input: TestInputRow, doneUpper: String): TestOutputRow = TestOutputRow(input.id, input.mandatoryString, input.nullableString, doneUpper)
}
-class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase with MockitoSugar {
+class UppercaseCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar {
import spark.implicits._
implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules)
implicit val dao: MenasDAO = mock[MenasDAO] // you may have to hard-code your own implementation here (if not working with menas)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val experimentalMR = true
val isCatalystWorkaroundEnabled = true
@@ -67,7 +69,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: Seq[TestOutputRow] = outputData.as[TestOutputRow].collect().toSeq
val expected: Seq[TestOutputRow] = (input zip Seq("HELLO WORLD", "ONE RING TO RULE THEM ALL", "ALREADY CAPS")).map(x => TestOutputRow(x._1, x._2))
@@ -101,7 +103,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: Seq[TestOutputRow] = outputData.as[TestOutputRow].collect().toSeq
val expected: Seq[TestOutputRow] = (input zip Seq("1", "4", "9")).map(x => TestOutputRow(x._1, x._2))
@@ -134,7 +136,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[TestOutputRow] = outputData.as[TestOutputRow].collect().toList
val expected: List[TestOutputRow] = (input zip Seq("WHAT A BEAUTIFUL PLACE", "ONE RING TO FIND THEM", null)).map(x => TestOutputRow(x._1, x._2)).toList
diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala
index 0716dd4a4..c3f656b9b 100644
--- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala
+++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala
@@ -18,14 +18,15 @@ package za.co.absa.enceladus.examples.interpreter.rules.custom
import com.typesafe.config.ConfigFactory
import org.apache.spark.sql
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
-import org.scalatest.mockito.MockitoSugar
+import org.scalatest.funsuite.AnyFunSuite
+import org.mockito.scalatest.MockitoSugar
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches}
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
case class XPadTestInputRow(intField: Int, stringField: Option[String])
@@ -34,11 +35,12 @@ object XPadTestOutputRow {
def apply(input: XPadTestInputRow, targetField: String): XPadTestOutputRow = XPadTestOutputRow(input.intField, input.stringField, targetField)
}
-class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with MockitoSugar {
+class LpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar {
import spark.implicits._
implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules)
implicit val dao: MenasDAO = mock[MenasDAO] // you may have to hard-code your own implementation here (if not working with menas)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val experimentalMR = true
val isCatalystWorkaroundEnabled = true
@@ -69,7 +71,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("~~~Short", "This is long", "~~~~~~~~")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -102,7 +104,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: Seq[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toSeq
val expected: Seq[XPadTestOutputRow] = (input zip Seq("007", "042", "100000")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -135,7 +137,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("12abcdefgh", "1231231$$$", "1231231231")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -168,7 +170,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("A", "AAAAAAAAAAAAAAAAAAAA", "")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -178,7 +180,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo
}
-class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
+class RpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase {
import spark.implicits._
@@ -187,6 +189,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
private val meansCredentials = MenasKerberosCredentials("user@EXAMPLE.COM", "src/test/resources/user.keytab.example")
implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules)
implicit val dao: MenasDAO = RestDaoFactory.getInstance(meansCredentials, menasBaseUrls) // you may have to hard-code your own implementation here (if not working with menas)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val experimentalMR = true
val isCatalystWorkaroundEnabled = true
@@ -217,7 +220,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("Short...", "This is long", "........")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -250,7 +253,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: Seq[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toSeq
val expected: Seq[XPadTestOutputRow] = (input zip Seq("100", "420", "100000")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -283,7 +286,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("abcdefgh12", "$$$1231231", "1231231231")).map(x => XPadTestOutputRow(x._1, x._2))
@@ -316,7 +319,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList
val expected: List[XPadTestOutputRow] = (input zip List("A", "AAAAAAAAAAAAAAAAAAAA", "")).map(x => XPadTestOutputRow(x._1, x._2))
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala
index 4c7936455..a6409af4f 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala
@@ -16,9 +16,10 @@
package za.co.absa.enceladus.menas.auth.jwt
import io.jsonwebtoken.security.WeakKeyException
-import org.scalatest.{Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
-class JwtFactoryTest extends WordSpec with Matchers {
+class JwtFactoryTest extends AnyWordSpec with Matchers {
private val secret = "1234567890qwertyuiopasdfghjklzxcvbnm"
private val jwtFactory = new JwtFactory(secret)
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala
index 53f9596bb..a8b4fb699 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala
@@ -17,8 +17,9 @@ package za.co.absa.enceladus.menas.controllers
import org.mockito.Mockito
import org.scalatest.concurrent.Futures
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{AsyncFlatSpec, Matchers}
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.flatspec.AsyncFlatSpec
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.menas.models.SchemaApiFeatures
import za.co.absa.enceladus.menas.services.SchemaRegistryService
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala
index afaa00b86..773a448f2 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala
@@ -18,7 +18,8 @@ package za.co.absa.enceladus.menas.integration.repositories
import java.util.concurrent.TimeUnit
import org.mongodb.scala.MongoDatabase
-import org.scalatest.{BeforeAndAfter, WordSpec}
+import org.scalatest.wordspec.AnyWordSpec
+import org.scalatest.BeforeAndAfter
import org.springframework.beans.factory.annotation.Autowired
import za.co.absa.enceladus.menas.integration.TestContextManagement
import za.co.absa.enceladus.menas.integration.fixtures.FixtureService
@@ -27,7 +28,7 @@ import za.co.absa.enceladus.menas.services.MigrationService
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
-abstract class BaseRepositoryTest extends WordSpec with TestContextManagement with BeforeAndAfter {
+abstract class BaseRepositoryTest extends AnyWordSpec with TestContextManagement with BeforeAndAfter {
val awaitDuration: Duration = Duration(2000, TimeUnit.MILLISECONDS)
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala
index d87956178..7c6dceb57 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala
@@ -19,10 +19,10 @@ import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature}
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.menas.utils.converters.SparkMenasSchemaConvertor
-class SchemaConvertersSuite extends FunSuite {
+class SchemaConvertersSuite extends AnyFunSuite {
val objectMapper: ObjectMapper = new ObjectMapper()
.registerModule(DefaultScalaModule)
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala
index 159123322..f5d581a63 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala
@@ -15,15 +15,16 @@
package za.co.absa.enceladus.menas.services
-import org.scalatest.{BeforeAndAfter, FunSuite}
-import org.scalatest.mockito.MockitoSugar
-
-import scala.concurrent.duration.Duration
import java.util.concurrent.TimeUnit
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.BeforeAndAfter
+import org.scalatest.funsuite.AnyFunSuite
+
+import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
-abstract class BaseServiceTest extends FunSuite with MockitoSugar with BeforeAndAfter {
+abstract class BaseServiceTest extends AnyFunSuite with MockitoSugar with BeforeAndAfter {
val shortTimeout = Duration(100, TimeUnit.MILLISECONDS)
val longTimeout = Duration(1000, TimeUnit.MILLISECONDS)
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala
index 86057fc30..3cc6e84a3 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala
@@ -37,7 +37,7 @@ class DatasetServiceTest extends VersionedModelServiceTest[Dataset] {
val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress())
Mockito.when(modelRepository.isUniqueName("dataset")).thenReturn(Future.successful(true))
- Mockito.when(modelRepository.create(any[Dataset](), eqTo("user"))).thenReturn(Future.failed(writeException))
+ Mockito.when(modelRepository.create(any[Dataset], eqTo("user"))).thenReturn(Future.failed(writeException))
val result = intercept[ValidationException] {
await(service.create(dataset, "user"))
@@ -52,7 +52,7 @@ class DatasetServiceTest extends VersionedModelServiceTest[Dataset] {
Mockito.when(modelRepository.getVersion("dataset", 1)).thenReturn(Future.successful(Some(dataset)))
Mockito.when(modelRepository.getLatestVersionValue("dataset")).thenReturn(Future.successful(Some(1)))
Mockito.when(modelRepository.isUniqueName("dataset")).thenReturn(Future.successful(true))
- Mockito.when(modelRepository.update(eqTo("user"), any[Dataset]())).thenReturn(Future.failed(writeException))
+ Mockito.when(modelRepository.update(eqTo("user"), any[Dataset])).thenReturn(Future.failed(writeException))
val result = intercept[ValidationException] {
await(service.update("user", dataset))
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala
index f832ffc60..29be50476 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala
@@ -17,7 +17,7 @@ package za.co.absa.enceladus.menas.services
import com.mongodb.{MongoWriteException, ServerAddress, WriteError}
import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito
+import org.mockito.scalatest.MockitoSugar
import org.mongodb.scala.Completed
import org.mongodb.scala.bson.BsonDocument
import za.co.absa.enceladus.menas.exceptions.ValidationException
@@ -28,7 +28,7 @@ import za.co.absa.enceladus.model.test.factories.RunFactory
import scala.concurrent.Future
-class RunServiceTest extends BaseServiceTest {
+class RunServiceTest extends BaseServiceTest with MockitoSugar {
//mocks
private val runRepository = mock[RunMongoRepository]
@@ -41,7 +41,7 @@ class RunServiceTest extends BaseServiceTest {
test("validate Run with non-unique ID") {
val run = RunFactory.getDummyRun(uniqueId = Option(uniqueId))
- Mockito.when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(true))
+ when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(true))
val validation = await(runService.validate(run))
@@ -60,7 +60,7 @@ class RunServiceTest extends BaseServiceTest {
test("validate valid Run") {
val run = RunFactory.getDummyRun(uniqueId = Option(uniqueId))
- Mockito.when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(false))
+ when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(false))
val validation = await(runService.validate(run))
@@ -72,11 +72,11 @@ class RunServiceTest extends BaseServiceTest {
val run2 = run1.copy(runId = 2)
val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress())
- Mockito.when(runRepository.getLatestRun("dataset", 1)).thenReturn(
+ when(runRepository.getLatestRun("dataset", 1)).thenReturn(
Future.successful(None),
Future.successful(Some(run1)))
- Mockito.when(runRepository.existsId(any[String]())).thenReturn(Future.successful(false))
- Mockito.when(runRepository.create(any[Run]())).thenReturn(
+ when(runRepository.existsId(any[String])).thenReturn(Future.successful(false))
+ when(runRepository.create(any[Run])).thenReturn(
Future.failed(writeException),
Future.successful(Completed()))
@@ -89,12 +89,12 @@ class RunServiceTest extends BaseServiceTest {
val run2 = run1.copy(runId = 2)
val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress())
- Mockito.when(runRepository.getLatestRun("dataset", 1)).thenReturn(
+ when(runRepository.getLatestRun("dataset", 1)).thenReturn(
Future.successful(None),
Future.successful(Some(run1)),
Future.successful(Some(run2)))
- Mockito.when(runRepository.existsId(any[String]())).thenReturn(Future.successful(false))
- Mockito.when(runRepository.create(any[Run]())).thenReturn(
+ when(runRepository.existsId(any[String])).thenReturn(Future.successful(false))
+ when(runRepository.create(any[Run])).thenReturn(
Future.failed(writeException),
Future.failed(writeException),
Future.successful(Completed()))
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala
index c709fc1bd..805d5a8c1 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala
@@ -15,10 +15,11 @@
package za.co.absa.enceladus.menas.utils
-import org.scalatest.{FlatSpec, Matchers}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaFormatException
-class SchemaTypeSuite extends FlatSpec with Matchers {
+class SchemaTypeSuite extends AnyFlatSpec with Matchers {
"SchemaType.fromSchemaName" should "correctly derive SchemaType.Value from string" in {
SchemaType.fromSchemaName("struct") shouldBe SchemaType.Struct
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala
index f3553658b..e38458eda 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala
@@ -15,7 +15,7 @@
package za.co.absa.enceladus.menas.utils.converters
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import org.apache.spark.sql.types._
import za.co.absa.enceladus.model._
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
@@ -25,7 +25,7 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.databind.SerializationFeature
import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaParsingException
-class SparkMenasSchemaConvertorSuite extends FunSuite with SparkTestBase {
+class SparkMenasSchemaConvertorSuite extends AnyFunSuite with SparkTestBase {
private val objectMapper = new ObjectMapper()
.registerModule(DefaultScalaModule)
.registerModule(new JavaTimeModule())
diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala
index b7f223770..854f21894 100644
--- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala
+++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala
@@ -18,17 +18,18 @@ package za.co.absa.enceladus.menas.utils.parsers
import org.apache.avro.SchemaParseException
import org.apache.commons.io.IOUtils
import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType}
-import org.mockito.ArgumentMatchers.any
import org.mockito.Mockito
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Inside, Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Inside
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException
import za.co.absa.enceladus.menas.TestResourcePath
import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaParsingException
import za.co.absa.enceladus.menas.utils.SchemaType
import za.co.absa.enceladus.menas.utils.converters.SparkMenasSchemaConvertor
-class SchemaParserSuite extends WordSpec with Matchers with MockitoSugar with Inside {
+class SchemaParserSuite extends AnyWordSpec with Matchers with MockitoSugar with Inside {
val mockSchemaConvertor: SparkMenasSchemaConvertor = mock[SparkMenasSchemaConvertor]
val someStructType: StructType = StructType(Seq(StructField(name = "field1", dataType = DataTypes.IntegerType)))
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala
index e49b74820..3d72d483d 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.migrations.framework
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.framework.fixture.MigrationTestData._
import za.co.absa.enceladus.migrations.framework.fixture.MigrationTestDoubles._
-class BaseMigrationSuite extends FunSuite {
+class BaseMigrationSuite extends AnyFunSuite {
test("Test collection names are determined properly for a given db version") {
val mig = new Migrator(DocumentDbStub, MigrationExample0 :: MigrationExample1 :: Nil)
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala
index 4ed821ada..6a75b6fd5 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala
@@ -15,10 +15,10 @@
package za.co.absa.enceladus.migrations.framework
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.framework.fixture.UseCaseTestData
-class MigrationUseCaseSuite extends FunSuite {
+class MigrationUseCaseSuite extends AnyFunSuite {
test("Test a database initialization") {
val testData = new UseCaseTestData
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala
index 03632ff4c..3bd98cefa 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala
@@ -15,9 +15,9 @@
package za.co.absa.enceladus.migrations.framework
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
-class ObjectIdToolsSuite extends FunSuite {
+class ObjectIdToolsSuite extends AnyFunSuite {
test("Test ObjectId extractor ") {
val doc1 = """{ "_id" : { "$oid" : "5b98eea5a43a28a6154a2453" }, "name" : "Test" }"""
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala
index f077ea5e7..6dcf65598 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.migrations.framework.continuous
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.continuous.EntityVersionMap
import za.co.absa.enceladus.migrations.framework.continuous.fixture.EntityVersionMapMock
-class EntityMapSuite extends FunSuite {
+class EntityMapSuite extends AnyFunSuite {
test("Test entity version map returns correct mapping when it is available") {
val enp: EntityVersionMap = new EntityVersionMapMock
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala
index e7d1a5a98..80f9330af 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.migrations.framework.continuous.integration
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.continuous.migrate01.ContinuousMigrator
import za.co.absa.enceladus.migrations.framework.continuous.integration.fixture.ExampleDatabaseFixture
-class ContinuousMigrationIntegrationSuite extends FunSuite with ExampleDatabaseFixture {
+class ContinuousMigrationIntegrationSuite extends AnyFunSuite with ExampleDatabaseFixture {
test("Test schema migrates properly and conflicts are resolved") {
val mig = new ContinuousMigrator(db, db)
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala
index 321e7a696..511cd9e27 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.migrations.framework.continuous.integration
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.continuous.{EntityVersionMap, EntityVersionMapMongo}
import za.co.absa.enceladus.migrations.framework.integration.fixture.MongoDbFixture
-class EntityMapIntegrationSuite extends FunSuite with MongoDbFixture {
+class EntityMapIntegrationSuite extends AnyFunSuite with MongoDbFixture {
test("Test entity version map returns correct mapping when it is available") {
val enp: EntityVersionMap = new EntityVersionMapMongo(dbRaw)
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala
index 5382fe37a..8a2a2f8e2 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala
@@ -15,14 +15,14 @@
package za.co.absa.enceladus.migrations.framework.integration
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.framework.Configuration.DatabaseVersionCollectionName
import za.co.absa.enceladus.migrations.framework.Migrator
import za.co.absa.enceladus.migrations.framework.dao.ScalaMongoImplicits
import za.co.absa.enceladus.migrations.framework.integration.fixture.MigrationsFixture
import za.co.absa.enceladus.migrations.framework.integration.data.IntegrationTestData
-class MigrationsIntegrationSuite extends FunSuite with MigrationsFixture {
+class MigrationsIntegrationSuite extends AnyFunSuite with MigrationsFixture {
val testData = new IntegrationTestData
import testData._
diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala
index 1320e1956..15addb22a 100644
--- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala
+++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala
@@ -16,11 +16,11 @@
package za.co.absa.enceladus.migrations.framework.integration
import org.mongodb.scala.bson.collection.immutable.Document
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.migrations.framework.integration.fixture.MongoDbFixture
import za.co.absa.enceladus.migrations.framework.migration.{ASC, DESC, IndexField}
-class MongoDbIntegrationSuite extends FunSuite with MongoDbFixture {
+class MongoDbIntegrationSuite extends AnyFunSuite with MongoDbFixture {
import za.co.absa.enceladus.migrations.framework.dao.ScalaMongoImplicits._
test("Test add/drop collections") {
diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala
index 723123462..133509e12 100644
--- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala
+++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala
@@ -16,11 +16,11 @@
package za.co.absa.enceladus.plugins.buildin.kafka
import org.apache.commons.io.IOUtils
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.plugins.buildin.factories.DceControlInfoFactory
import za.co.absa.enceladus.plugins.builtin.controlinfo.ControlInfoAvroSerializer
-class ControlInfoSerSuite extends FunSuite {
+class ControlInfoSerSuite extends AnyFunSuite {
test ("Control info key serialize to Avro") {
val dceControlInfo = DceControlInfoFactory.getDummyDceControlInfo()
val avroControlInfoKey = ControlInfoAvroSerializer.convertInfoKey(dceControlInfo)
diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala
index b673c736f..1f2e0308f 100644
--- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala
+++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.plugins.buildin.kafka
import com.typesafe.config.ConfigFactory
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.plugins.buildin.factories.DceControlInfoFactory
import za.co.absa.enceladus.plugins.buildin.kafka.dummy.DummyControlInfoProducer
import za.co.absa.enceladus.plugins.builtin.common.mq.kafka.{KafkaConnectionParams, KafkaSecurityParams}
@@ -24,7 +24,7 @@ import za.co.absa.enceladus.plugins.builtin.controlinfo.mq.ControlInfoSenderPlug
import scala.collection.JavaConverters._
-class KafkaPluginSuite extends FunSuite {
+class KafkaPluginSuite extends AnyFunSuite {
test("Test Kafka info plugin sends control measurements") {
val producer = new DummyControlInfoProducer
val dceControlInfo = DceControlInfoFactory.getDummyDceControlInfo()
diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala
index de95d10e4..d2d29d335 100644
--- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala
+++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala
@@ -22,7 +22,9 @@ import com.github.tomakehurst.wiremock.client.WireMock._
import com.github.tomakehurst.wiremock.core.WireMockConfiguration
import com.typesafe.config.{ConfigFactory, ConfigValueFactory}
import org.apache.spark.sql.DataFrame
-import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.BeforeAndAfterAll
import za.co.absa.abris.avro.read.confluent.SchemaManager
import za.co.absa.enceladus.plugins.builtin.common.mq.kafka.KafkaConnectionParams
import za.co.absa.enceladus.plugins.builtin.errorsender.DceError
@@ -33,7 +35,7 @@ import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class KafkaErrorSenderPluginSuite extends FlatSpec with SparkTestBase with Matchers with BeforeAndAfterAll {
+class KafkaErrorSenderPluginSuite extends AnyFlatSpec with SparkTestBase with Matchers with BeforeAndAfterAll {
private val port = 6081
private val wireMockServer = new WireMockServer(WireMockConfiguration.wireMockConfig().port(port))
diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala
index 07c03eb6f..002638888 100644
--- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala
+++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala
@@ -17,10 +17,11 @@ package za.co.absa.enceladus.plugins.builtin.errorsender.params
import java.time.Instant
-import org.scalatest.{FlatSpec, Matchers}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.utils.modules.SourcePhase
-class ErrorSenderPluginParamsSuite extends FlatSpec with Matchers {
+class ErrorSenderPluginParamsSuite extends AnyFlatSpec with Matchers {
private val params = ErrorSenderPluginParams(
datasetName = "datasetName1",
diff --git a/pom.xml b/pom.xml
index a65850f27..0b9cb1333 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
1.6
3.1.1
- 0.2.6
+ 3.0.0
2.13.65
2.7.3
3.5.4
@@ -163,7 +163,8 @@
4.11
0-10
4.17.10
- 2.10.0
+ 1.15.0
+ 3.5.2
2.22.2
3.6.4
2.7.0
@@ -174,7 +175,7 @@
0.9.0
2.11.12
2.0.0
- 3.0.5
+ 3.2.2
4.0.0-RC2
2.4
0.2.1
@@ -334,10 +335,22 @@
4.11
test
+
+ org.mockito
+ mockito-scala_${scala.compat.version}
+ ${mockito.scala.version}
+ test
+
+
+ org.mockito
+ mockito-scala-scalatest_${scala.compat.version}
+ ${mockito.scala.version}
+ test
+
org.mockito
mockito-core
- ${mockito.version}
+ ${mockito.core.version}
test
diff --git a/spark-jobs/src/main/resources/reference.conf b/spark-jobs/src/main/resources/reference.conf
index 461e29b11..48efb6d48 100644
--- a/spark-jobs/src/main/resources/reference.conf
+++ b/spark-jobs/src/main/resources/reference.conf
@@ -95,3 +95,10 @@ timezone="UTC"
# Optional security settings
#kafka.security.protocol="SASL_SSL"
#kafka.sasl.mechanism="GSSAPI"
+
+# S3 specific settings:
+s3.region = "eu-west-1" # default region, overridable
+
+# s3.kmsKeyId is recommended to set externally only:
+# s3.kmsKeyId = "arn:aws:kms:eu-west-1:XXXX:key/YYYY"
+
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala
new file mode 100644
index 000000000..3b209d32e
--- /dev/null
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus
+
+import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider
+
+trait S3DefaultCredentialsProvider {
+
+ implicit val defaultCredentialsProvider = DefaultCredentialsProvider.create()
+
+}
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala
index 8e720935d..b11a1c143 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala
@@ -22,10 +22,13 @@ import com.typesafe.config.{Config, ConfigFactory}
import org.apache.spark.SPARK_VERSION
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
+import software.amazon.awssdk.regions.Region
import za.co.absa.atum.AtumImplicits
-import za.co.absa.atum.core.Atum
+import za.co.absa.atum.core.{Atum, ControlType}
+import za.co.absa.atum.persistence.S3KmsSettings
import za.co.absa.enceladus.common.Constants.{InfoDateColumn, InfoVersionColumn}
-import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig}
+import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config}
import za.co.absa.enceladus.common.plugin.PostProcessingService
import za.co.absa.enceladus.common.plugin.menas.{MenasPlugin, MenasRunUrl}
import za.co.absa.enceladus.common.version.SparkVersionGuard
@@ -34,7 +37,7 @@ import za.co.absa.enceladus.dao.rest.MenasConnectionStringParser
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.plugins.builtin.errorsender.params.ErrorSenderPluginParams
import za.co.absa.enceladus.utils.config.{ConfigReader, SecureConfig}
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.{DistributedFsUtils, S3FsUtils}
import za.co.absa.enceladus.utils.general.ProjectMetadataTools
import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.modules.SourcePhase.Standardization
@@ -42,13 +45,14 @@ import za.co.absa.enceladus.utils.performance.PerformanceMeasurer
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
import scala.util.control.NonFatal
-import scala.util.{Failure, Random, Success, Try}
+import scala.util.{Failure, Success, Try}
trait CommonJobExecution {
protected case class PreparationResult(dataset: Dataset,
reportVersion: Int,
pathCfg: PathConfig,
+ s3Config: S3Config,
performance: PerformanceMeasurer)
TimeZoneNormalizer.normalizeJVMTimeZone()
@@ -78,7 +82,7 @@ trait CommonJobExecution {
protected def prepareJob[T]()
(implicit dao: MenasDAO,
cmd: JobConfigParser[T],
- fsUtils: FileSystemVersionUtils,
+ fsUtils: DistributedFsUtils,
spark: SparkSession): PreparationResult = {
val confReader: ConfigReader = new ConfigReader(conf)
confReader.logEffectiveConfigProps(Constants.ConfigKeysToRedact)
@@ -87,8 +91,9 @@ trait CommonJobExecution {
val dataset = dao.getDataset(cmd.datasetName, cmd.datasetVersion)
val reportVersion = getReportVersion(cmd, dataset)
val pathCfg: PathConfig = getPathConfig(cmd, dataset, reportVersion)
+ val s3Config: S3Config = getS3Config
- validateOutputPath(fsUtils, pathCfg)
+ validateOutputPath(s3Config, pathCfg)
// Enable Spline
import za.co.absa.spline.harvester.SparkLineageInitializer._
@@ -97,23 +102,21 @@ trait CommonJobExecution {
// Enable non-default persistence storage level if provided in the command line
cmd.persistStorageLevel.foreach(Atum.setCachingStorageLevel)
- PreparationResult(dataset, reportVersion, pathCfg, new PerformanceMeasurer(spark.sparkContext.appName))
+ PreparationResult(dataset, reportVersion, pathCfg, s3Config, new PerformanceMeasurer(spark.sparkContext.appName))
}
- protected def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit
+ protected def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit
- protected def validateIfPathAlreadyExists(fsUtils: FileSystemVersionUtils, path: String): Unit = {
- // TODO fix for s3 [ref issue #1416]
-
-// if (fsUtils.hdfsExists(path)) {
-// throw new IllegalStateException(
-// s"Path $path already exists. Increment the run version, or delete $path"
-// )
-// }
+ protected def validateIfPathAlreadyExists(s3Config: S3Config, path: String)(implicit fsUtils: DistributedFsUtils): Unit = {
+ if (fsUtils.exists(path)) {
+ throw new IllegalStateException(
+ s"Path $path already exists. Increment the run version, or delete $path"
+ )
+ }
}
protected def runPostProcessing[T](sourcePhase: SourcePhase, preparationResult: PreparationResult, jobCmdConfig: JobConfigParser[T])
- (implicit spark: SparkSession, fileSystemVersionUtils: FileSystemVersionUtils): Unit = {
+ (implicit spark: SparkSession, fileSystemVersionUtils: DistributedFsUtils): Unit = {
val outputPath = sourcePhase match {
case Standardization => preparationResult.pathCfg.standardizationPath
case _ => preparationResult.pathCfg.publishPath
@@ -130,8 +133,8 @@ trait CommonJobExecution {
}.mkString(",")
}
- val sourceSystem = "source1" //Atum.getControlMeasure.metadata.sourceApplication // TODO fix for s3 [ref issue #1416]
- val uniqueRunId = Some(s"runId-${Math.abs(Random.nextLong())}") //Atum.getControlMeasure.runUniqueId // TODO fix for s3 [ref issue #1416]
+ val sourceSystem = Atum.getControlMeasure.metadata.sourceApplication
+ val uniqueRunId = Atum.getControlMeasure.runUniqueId
val params = ErrorSenderPluginParams(jobCmdConfig.datasetName,
jobCmdConfig.datasetVersion, jobCmdConfig.reportDate, preparationResult.reportVersion, outputPath,
@@ -166,6 +169,18 @@ trait CommonJobExecution {
)
}
+ protected def getS3Config: S3Config = {
+ val keyId = conf.getString("s3.kmsKeyId")
+ val region = Region.of(conf.getString("s3.region"))
+
+ S3Config(region, keyId)
+ }
+
+ protected def getS3FsUtil(implicit credentialsProvider: AwsCredentialsProvider): S3FsUtils = {
+ val s3Config = getS3Config
+ S3FsUtils(s3Config.region, S3KmsSettings(s3Config.kmsKeyId))
+ }
+
private def buildPublishPath[T](cmd: JobConfigParser[T], ds: Dataset, reportVersion: Int): String = {
val infoDateCol: String = InfoDateColumn
val infoVersionCol: String = InfoVersionColumn
@@ -218,12 +233,13 @@ trait CommonJobExecution {
}
protected def handleEmptyOutput(job: SourcePhase)(implicit spark: SparkSession): Unit = {
- import za.co.absa.atum.core.Constants._
val areCountMeasurementsAllZero = Atum.getControlMeasure.checkpoints
.flatMap(checkpoint =>
checkpoint.controls.filter(control =>
- control.controlName.equalsIgnoreCase(controlTypeRecordCount)))
+ ControlType.isControlMeasureTypeEqual(control.controlType, ControlType.Count.value)
+ )
+ )
.forall(m => Try(m.controlValue.toString.toDouble).toOption.contains(0D))
if (areCountMeasurementsAllZero) {
@@ -235,7 +251,7 @@ trait CommonJobExecution {
}
}
- private def getReportVersion[T](jobConfig: JobConfigParser[T], dataset: Dataset)(implicit fsUtils: FileSystemVersionUtils): Int = {
+ private def getReportVersion[T](jobConfig: JobConfigParser[T], dataset: Dataset)(implicit fsUtils: DistributedFsUtils): Int = {
jobConfig.reportVersion match {
case Some(version) => version
case None =>
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala
index da0e4902b..303f470cc 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala
@@ -31,6 +31,7 @@ object Constants {
"spark.yarn.dist.files",
"spline.mongodb.url",
"sun.boot.class.path",
- "sun.java.command"
+ "sun.java.command",
+ "s3.kmsKeyId"
)
}
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala
index 86ca68885..a8f6bb64d 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala
@@ -15,7 +15,7 @@
package za.co.absa.enceladus.common
-import za.co.absa.atum.core.Atum
+import za.co.absa.atum.core.{Atum, ControlType}
import za.co.absa.atum.model.Checkpoint
import za.co.absa.enceladus.utils.implicits.OptionImplicits._
import za.co.absa.enceladus.utils.validation.ValidationException
@@ -69,14 +69,14 @@ object ControlInfoValidation {
checkpoint <- checkpoints
.find(c => c.name.equalsIgnoreCase(checkpointName) || c.workflowName.equalsIgnoreCase(checkpointName))
.toTry(new Exception(s"Missing $checkpointName checkpoint"))
- measurement <- checkpoint.controls.find(m => m.controlType.equalsIgnoreCase(controlTypeRecordCount))
- .toTry(new Exception(s"$checkpointName checkpoint does not have a $controlTypeRecordCount control"))
+ measurement <- checkpoint.controls.find(m => ControlType.isControlMeasureTypeEqual(m.controlType, ControlType.Count.value))
+ .toTry(new Exception(s"$checkpointName checkpoint does not have a ${ControlType.Count.value} control"))
res <- Try {
val rowCount = measurement.controlValue.toString.toLong
if (rowCount >= 0) rowCount else throw new Exception(s"Negative value")
}.recoverWith {
case t: Throwable =>
- Failure(new Exception(s"Wrong $checkpointName $controlTypeRecordCount value: ${t.getMessage}"))
+ Failure(new Exception(s"Wrong $checkpointName ${ControlType.Count.value} value: ${t.getMessage}"))
}
} yield res
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala
new file mode 100644
index 000000000..9be6ea7f5
--- /dev/null
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.common.config
+
+import software.amazon.awssdk.regions.Region
+
+case class S3Config(region: Region, kmsKeyId: String)
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala
index b28219e03..418ed13d0 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala
@@ -52,7 +52,7 @@ object MenasPlugin {
isJobStageOnly,
generateNewRun)
listener = Option(eventListener)
- //PluginManager.loadPlugin(eventListener) // TODO fix for s3 [ref issue #1416]
+ PluginManager.loadPlugin(eventListener)
}
def runUniqueId: Option[String] = listener.flatMap(_.runUniqueId)
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala
index d0177fdc8..5325eccae 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala
@@ -20,11 +20,14 @@ import java.io.{PrintWriter, StringWriter}
import org.apache.spark.sql.functions.{lit, to_date}
import org.apache.spark.sql.{DataFrame, SparkSession}
import za.co.absa.atum.AtumImplicits
-import za.co.absa.atum.AtumImplicits._
+import za.co.absa.atum.AtumImplicits.{DataSetWrapper, SparkSessionWrapper}
import za.co.absa.atum.core.Atum
+import za.co.absa.atum.persistence.S3KmsSettings
+import za.co.absa.atum.utils.S3Utils.StringS3LocationExt
+import za.co.absa.enceladus.S3DefaultCredentialsProvider
import za.co.absa.enceladus.common.Constants.{InfoDateColumn, InfoDateColumnString, InfoVersionColumn, ReportDateFormat}
import za.co.absa.enceladus.common.RecordIdGeneration._
-import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig}
+import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config}
import za.co.absa.enceladus.common.plugin.menas.MenasPlugin
import za.co.absa.enceladus.common.{CommonJobExecution, Constants, RecordIdGeneration}
import za.co.absa.enceladus.conformance.config.{ConformanceConfig, ConformanceConfigParser}
@@ -34,7 +37,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.auth.MenasCredentials
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.performance.PerformanceMetricTools
@@ -43,37 +46,36 @@ import za.co.absa.enceladus.utils.schema.SchemaUtils
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}
-trait ConformanceExecution extends CommonJobExecution {
+trait ConformanceExecution extends CommonJobExecution with S3DefaultCredentialsProvider {
private val conformanceReader = new ConformancePropertiesProvider
private val sourceId = SourcePhase.Conformance
protected def prepareConformance[T](preparationResult: PreparationResult)
(implicit dao: MenasDAO,
cmd: ConformanceConfigParser[T],
- fsUtils: FileSystemVersionUtils,
+ fsUtils: DistributedFsUtils,
spark: SparkSession): Unit = {
- //val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath)
- //preparationResult.performance.startMeasurement(stdDirSize) // TODO fix for s3 [ref issue #1416]
+
+ val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath)
+ preparationResult.performance.startMeasurement(stdDirSize)
log.info(s"standardization path: ${preparationResult.pathCfg.standardizationPath}")
log.info(s"publish path: ${preparationResult.pathCfg.publishPath}")
- // Enable Control Framework
- import za.co.absa.atum.AtumImplicits.SparkSessionWrapper
-
// reinitialize Control Framework in case of combined job
if(cmd.isInstanceOf[StandardizationConformanceConfig]) {
spark.disableControlMeasuresTracking()
}
- // InputPath is standardizationPath in the combined job
- // TODO fix for s3 [ref issue #1416]
-// spark.enableControlMeasuresTracking(s"${preparationResult.pathCfg.standardizationPath}/_INFO")
-// .setControlMeasuresWorkflow(sourceId.toString)
+ val dataS3Location = preparationResult.pathCfg.standardizationPath.toS3Location(preparationResult.s3Config.region)
+ val infoS3Location = dataS3Location.copy(path = s"${dataS3Location.path}/_INFO")
+
+ // Enable Control Framework
+ spark.enableControlMeasuresTrackingForS3(sourceS3Location = Some(infoS3Location), destinationS3Config = None)
+ .setControlMeasuresWorkflow(sourceId.toString)
// Enable control framework performance optimization for pipeline-like jobs
- // TODO fix for s3 [ref issue #1416]
- //Atum.setAllowUnpersistOldDatasets(true)
+ Atum.setAllowUnpersistOldDatasets(true)
// Enable Menas plugin for Control Framework
MenasPlugin.enableMenas(
@@ -92,8 +94,8 @@ trait ConformanceExecution extends CommonJobExecution {
}
}
- override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = {
- validateIfPathAlreadyExists(fsUtils, pathConfig.publishPath)
+ override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = {
+ validateIfPathAlreadyExists(s3Config, pathConfig.publishPath)
}
protected def readConformanceInputData(pathCfg: PathConfig)(implicit spark: SparkSession): DataFrame = {
@@ -101,22 +103,23 @@ trait ConformanceExecution extends CommonJobExecution {
}
protected def conform[T](inputData: DataFrame, preparationResult: PreparationResult)
- (implicit spark: SparkSession, cmd: ConformanceConfigParser[T], dao: MenasDAO): DataFrame = {
+ (implicit spark: SparkSession, cmd: ConformanceConfigParser[T], dao: MenasDAO,
+ fsUtils: DistributedFsUtils): DataFrame = {
val recordIdGenerationStrategy = getRecordIdGenerationStrategyFromConfig(conf)
implicit val featureSwitcher: FeatureSwitches = conformanceReader.readFeatureSwitches()
Try {
- // handleControlInfoValidation() // TODO fix for s3 [ref issue #1416]
- DynamicInterpreter.interpret(preparationResult.dataset, inputData)
+ handleControlInfoValidation()
+ DynamicInterpreter().interpret(preparationResult.dataset, inputData)
} match {
case Failure(e: ValidationException) =>
- // AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, e.techDetails) // TODO fix for s3 [ref issue #1416]
+ AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, e.techDetails)
throw e
case Failure(NonFatal(e)) =>
val sw = new StringWriter
e.printStackTrace(new PrintWriter(sw))
- // AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString) // TODO fix for s3 [ref issue #1416]
+ AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString)
throw e
case Success(conformedDF) =>
if (SchemaUtils.fieldExists(Constants.EnceladusRecordId, conformedDF.schema)) {
@@ -133,56 +136,51 @@ trait ConformanceExecution extends CommonJobExecution {
menasCredentials: MenasCredentials)
(implicit spark: SparkSession,
cmd: ConformanceConfigParser[T],
- fsUtils: FileSystemVersionUtils): Unit = {
+ fsUtils: DistributedFsUtils): Unit = {
val cmdLineArgs: String = args.mkString(" ")
- // TODO fix for s3 [ref issue #1416]
-// PerformanceMetricTools.addJobInfoToAtumMetadata(
-// "conform",
-// preparationResult.pathCfg.standardizationPath,
-// preparationResult.pathCfg.publishPath,
-// menasCredentials.username, cmdLineArgs
-// )
+ PerformanceMetricTools.addJobInfoToAtumMetadata(
+ "conform",
+ preparationResult.pathCfg.standardizationPath,
+ preparationResult.pathCfg.publishPath,
+ menasCredentials.username, cmdLineArgs
+ )
val withPartCols = result
.withColumnIfDoesNotExist(InfoDateColumn, to_date(lit(cmd.reportDate), ReportDateFormat))
.withColumnIfDoesNotExist(InfoDateColumnString, lit(cmd.reportDate))
.withColumnIfDoesNotExist(InfoVersionColumn, lit(preparationResult.reportVersion))
- // TODO fix for s3 [ref issue #1416]
- val recordCount = -1
-// val recordCount = result.lastCheckpointRowCount match {
-// case None => withPartCols.count
-// case Some(p) => p
-// }
+ val recordCount: Long = result.lastCheckpointRowCount match {
+ case None => withPartCols.count
+ case Some(p) => p
+ }
if (recordCount == 0) {
handleEmptyOutput(SourcePhase.Conformance)
}
- // ensure the whole path but version exists
- //fsUtils.createAllButLastSubDir(preparationResult.pathCfg.publishPath) // TODO fix for s3 [ref issue #1416]
-
withPartCols.write.parquet(preparationResult.pathCfg.publishPath)
- // TODO fix for s3 [ref issue #1416]
- //val publishDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.publishPath)
- // preparationResult.performance.finishMeasurement(publishDirSize, recordCount)
-// PerformanceMetricTools.addPerformanceMetricsToAtumMetadata(
-// spark,
-// "conform",
-// preparationResult.pathCfg.standardizationPath,
-// preparationResult.pathCfg.publishPath,
-// menasCredentials.username, cmdLineArgs
-// )
-
- // TODO fix for s3 [ref issue #1416]
- //withPartCols.writeInfoFile(preparationResult.pathCfg.publishPath)
- //writePerformanceMetrics(preparationResult.performance, cmd)
-
- // TODO fix for s3 [ref issue #1416]
-// if (conformanceReader.isAutocleanStdFolderEnabled()) {
-// fsUtils.deleteDirectoryRecursively(preparationResult.pathCfg.standardizationPath)
-// }
+ val publishDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.publishPath)
+ preparationResult.performance.finishMeasurement(publishDirSize, recordCount)
+ PerformanceMetricTools.addPerformanceMetricsToAtumMetadata(
+ spark,
+ "conform",
+ preparationResult.pathCfg.standardizationPath,
+ preparationResult.pathCfg.publishPath,
+ menasCredentials.username, cmdLineArgs
+ )
+
+ val infoFilePath = s"${preparationResult.pathCfg.publishPath}/_INFO"
+ val infoFileLocation = infoFilePath.toS3Location(preparationResult.s3Config.region)
+ log.info(s"infoFilePath = $infoFilePath, infoFileLocation = $infoFileLocation")
+
+ withPartCols.writeInfoFileOnS3(infoFileLocation, S3KmsSettings(preparationResult.s3Config.kmsKeyId))
+ writePerformanceMetrics(preparationResult.performance, cmd)
+
+ if (conformanceReader.isAutocleanStdFolderEnabled()) {
+ fsUtils.deleteDirectoryRecursively(preparationResult.pathCfg.standardizationPath)
+ }
log.info(s"$sourceId finished successfully")
}
}
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala
index 15d2a7dc5..b48092382 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala
@@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.rest.RestDaoFactory
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.modules.SourcePhase
object DynamicConformanceJob extends ConformanceExecution {
@@ -32,7 +32,7 @@ object DynamicConformanceJob extends ConformanceExecution {
implicit val cmd: ConformanceConfig = ConformanceConfig.getFromArguments(args)
implicit val spark: SparkSession = obtainSparkSession(jobName) // initialize spark
- implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ implicit val fsUtils: DistributedFsUtils = getS3FsUtil
val menasCredentials = cmd.menasCredentialsFactory.getInstance()
implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls)
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala
index dd19e503d..3ae28f8f7 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala
@@ -32,6 +32,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.auth.{MenasCredentialsFactory, MenasKerberosCredentialsFactory, MenasPlainCredentialsFactory}
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.hyperdrive.ingestor.api.transformer.{StreamTransformer, StreamTransformerFactory}
class HyperConformance (implicit cmd: ConformanceConfig,
@@ -64,7 +65,10 @@ class HyperConformance (implicit cmd: ConformanceConfig,
val infoDateColumn = infoDateFactory.getInfoDateColumn(rawDf)
- val conformedDf = DynamicInterpreter.interpret(conformance, rawDf)
+ // using HDFS implementation until HyperConformance is S3-ready
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(sparkSession.sparkContext.hadoopConfiguration)
+
+ val conformedDf = DynamicInterpreter().interpret(conformance, rawDf)
.withColumnIfDoesNotExist(InfoDateColumn, coalesce(infoDateColumn, current_date()))
.withColumnIfDoesNotExist(InfoDateColumnString, coalesce(date_format(infoDateColumn,"yyyy-MM-dd"), lit("")))
.withColumnIfDoesNotExist(InfoVersionColumn, lit(reportVersion))
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala
index deace3822..00d9baae2 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala
@@ -31,12 +31,12 @@ import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, _}
import za.co.absa.enceladus.model.{Dataset => ConfDataset}
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.explode.ExplosionContext
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.general.Algorithms
import za.co.absa.enceladus.utils.schema.SchemaUtils
import za.co.absa.enceladus.utils.udf.UDFLibrary
-object DynamicInterpreter {
+case class DynamicInterpreter(implicit fsUtils: DistributedFsUtils) {
private val log = LoggerFactory.getLogger(this.getClass)
/**
@@ -55,11 +55,11 @@ object DynamicInterpreter {
implicit val interpreterContext: InterpreterContext = InterpreterContext(inputDf.schema, conformance,
featureSwitches, jobShortName, spark, dao, InterpreterContextArgs.fromConformanceConfig(progArgs))
- // applyCheckpoint(inputDf, "Start") // TODO fix for s3 [ref issue #1416]
+ applyCheckpoint(inputDf, "Start")
val conformedDf = applyConformanceRules(ensureErrorColumnExists(inputDf))
- // applyCheckpoint(conformedDf, "End") // TODO fix for s3 [ref issue #1416]
+ applyCheckpoint(conformedDf, "End")
logExecutionPlan(conformedDf)
conformedDf
@@ -264,7 +264,6 @@ object DynamicInterpreter {
*/
private def getMappingTableSizeMb(rule: MappingConformanceRule)
(implicit ictx: InterpreterContext): Int = {
- val fsUtils = new FileSystemVersionUtils(ictx.spark.sparkContext.hadoopConfiguration)
val mappingTableDef = ictx.dao.getMappingTable(rule.mappingTable, rule.mappingTableVersion)
val mappingTablePath = PartitioningUtils.getPartitionedPathName(mappingTableDef.hdfsPath,
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala
index beb9db4e4..0d0a57d57 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala
@@ -22,8 +22,11 @@ import org.apache.spark.sql.types.{StructField, StructType}
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import za.co.absa.atum.AtumImplicits
import za.co.absa.atum.core.Atum
+import za.co.absa.atum.persistence.S3KmsSettings
+import za.co.absa.enceladus.S3DefaultCredentialsProvider
import za.co.absa.enceladus.common.RecordIdGeneration.getRecordIdGenerationStrategyFromConfig
-import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig}
+import za.co.absa.atum.utils.S3Utils.StringS3LocationExt
+import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config}
import za.co.absa.enceladus.common.plugin.menas.MenasPlugin
import za.co.absa.enceladus.common.{CommonJobExecution, Constants}
import za.co.absa.enceladus.dao.MenasDAO
@@ -32,7 +35,7 @@ import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.config.{StandardizationConfig, StandardizationConfigParser}
import za.co.absa.enceladus.standardization.interpreter.StandardizationInterpreter
import za.co.absa.enceladus.standardization.interpreter.stages.PlainSchemaGenerator
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.{DistributedFsUtils, HdfsUtils}
import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.performance.PerformanceMetricTools
import za.co.absa.enceladus.utils.schema.{MetadataKeys, SchemaUtils, SparkUtils}
@@ -41,7 +44,7 @@ import za.co.absa.enceladus.utils.validation.ValidationException
import scala.util.control.NonFatal
-trait StandardizationExecution extends CommonJobExecution {
+trait StandardizationExecution extends CommonJobExecution with S3DefaultCredentialsProvider {
private val sourceId = SourcePhase.Standardization
protected def prepareStandardization[T](args: Array[String],
@@ -49,23 +52,30 @@ trait StandardizationExecution extends CommonJobExecution {
preparationResult: PreparationResult)
(implicit dao: MenasDAO,
cmd: StandardizationConfigParser[T],
- fsUtils: FileSystemVersionUtils,
+ fsUtils: DistributedFsUtils,
spark: SparkSession): StructType = {
- // val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.rawPath)
- // preparationResult.performance.startMeasurement(stdDirSize) // TODO fix for s3 [ref issue #1416]
+ val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.rawPath)
+ preparationResult.performance.startMeasurement(stdDirSize)
// Enable Control Framework
-
- // TODO fix for s3 [ref issue #1416]
import za.co.absa.atum.AtumImplicits.SparkSessionWrapper
-// spark.enableControlMeasuresTracking(s"${preparationResult.pathCfg.rawPath}/_INFO")
-// .setControlMeasuresWorkflow(sourceId.toString)
+
+ val inputDataS3Location = preparationResult.pathCfg.rawPath.toS3Location(preparationResult.s3Config.region)
+ val inputInfoS3Location = inputDataS3Location.copy(path = s"${inputDataS3Location.path}/_INFO")
+
+ val outputDataS3Location = preparationResult.pathCfg.standardizationPath.toS3Location(preparationResult.s3Config.region)
+ val outputInfoS3Location = outputDataS3Location.copy(path = s"${outputDataS3Location.path}/_INFO")
+ val kmsSettings = S3KmsSettings(preparationResult.s3Config.kmsKeyId)
+
+ spark.enableControlMeasuresTrackingForS3(sourceS3Location = Some(inputInfoS3Location),
+ destinationS3Config = Some(outputInfoS3Location, kmsSettings))
+ .setControlMeasuresWorkflow(sourceId.toString)
log.info(s"raw path: ${preparationResult.pathCfg.rawPath}")
log.info(s"standardization path: ${preparationResult.pathCfg.standardizationPath}")
// Enable control framework performance optimization for pipeline-like jobs
- //Atum.setAllowUnpersistOldDatasets(true) // TODO fix for s3 [ref issue #1416]
+ Atum.setAllowUnpersistOldDatasets(true)
// Enable Menas plugin for Control Framework
MenasPlugin.enableMenas(
@@ -76,19 +86,16 @@ trait StandardizationExecution extends CommonJobExecution {
preparationResult.reportVersion)
// Add report date and version (aka Enceladus info date and version) to Atum's metadata
- // TODO fix for s3 [ref issue #1416]
- //Atum.setAdditionalInfo(Constants.InfoDateColumn -> cmd.reportDate)
- //Atum.setAdditionalInfo(Constants.InfoVersionColumn -> preparationResult.reportVersion.toString)
+ Atum.setAdditionalInfo(Constants.InfoDateColumn -> cmd.reportDate)
+ Atum.setAdditionalInfo(Constants.InfoVersionColumn -> preparationResult.reportVersion.toString)
- // TODO fix for s3 [ref issue #1416]
// Add the raw format of the input file(s) to Atum's metadata
- //Atum.setAdditionalInfo("raw_format" -> cmd.rawFormat)
+ Atum.setAdditionalInfo("raw_format" -> cmd.rawFormat)
- // TODO fix for s3 [ref issue #1416]
-// PerformanceMetricTools.addJobInfoToAtumMetadata("std",
-// preparationResult.pathCfg.rawPath,
-// preparationResult.pathCfg.standardizationPath,
-// menasCredentials.username, args.mkString(" "))
+ PerformanceMetricTools.addJobInfoToAtumMetadata("std",
+ preparationResult.pathCfg.rawPath,
+ preparationResult.pathCfg.standardizationPath,
+ menasCredentials.username, args.mkString(" "))
dao.getSchema(preparationResult.dataset.schemaName, preparationResult.dataset.schemaVersion)
}
@@ -101,8 +108,8 @@ trait StandardizationExecution extends CommonJobExecution {
}
}
- override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = {
- validateIfPathAlreadyExists(fsUtils: FileSystemVersionUtils, pathConfig.standardizationPath)
+ override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = {
+ validateIfPathAlreadyExists(s3Config, pathConfig.standardizationPath)
}
protected def readStandardizationInputData[T](schema: StructType,
@@ -110,7 +117,7 @@ trait StandardizationExecution extends CommonJobExecution {
path: String,
dataset: Dataset)
(implicit spark: SparkSession,
- fsUtils: FileSystemVersionUtils,
+ fsUtils: DistributedFsUtils,
dao: MenasDAO): DataFrame = {
val numberOfColumns = schema.fields.length
val standardizationReader = new StandardizationPropertiesProvider()
@@ -130,6 +137,8 @@ trait StandardizationExecution extends CommonJobExecution {
private def getColumnNameOfCorruptRecord[R](schema: StructType, cmd: StandardizationConfigParser[R])
(implicit spark: SparkSession): Option[String] = {
// SparkUtils.setUniqueColumnNameOfCorruptRecord is called even if result is not used to avoid conflict
+
+ import AtumImplicits.DataSetWrapper
val columnNameOfCorruptRecord = SparkUtils.setUniqueColumnNameOfCorruptRecord(spark, schema)
if (cmd.rawFormat.equalsIgnoreCase("fixed-width") || cmd.failOnInputNotPerSchema) {
None
@@ -144,20 +153,18 @@ trait StandardizationExecution extends CommonJobExecution {
val recordIdGenerationStrategy = getRecordIdGenerationStrategyFromConfig(conf)
try {
- //handleControlInfoValidation() // TODO fix for s3 [ref issue #1416]
+ handleControlInfoValidation()
StandardizationInterpreter.standardize(inputData, schema, cmd.rawFormat,
cmd.failOnInputNotPerSchema, recordIdGenerationStrategy)
} catch {
case e@ValidationException(msg, errors) =>
val errorDescription = s"$msg\nDetails: ${errors.mkString("\n")}"
- // TODO fix for s3 [ref issue #1416]
- //AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError("Schema Validation", errorDescription, "")
+ AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError("Schema Validation", errorDescription, "")
throw e
case NonFatal(e) if !e.isInstanceOf[ValidationException] =>
val sw = new StringWriter
e.printStackTrace(new PrintWriter(sw))
- // TODO fix for s3 [ref issue #1416]
- //AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString)
+ AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString)
throw e
}
}
@@ -169,21 +176,19 @@ trait StandardizationExecution extends CommonJobExecution {
cmd: StandardizationConfigParser[T],
menasCredentials: MenasCredentials)
(implicit spark: SparkSession,
- fsUtils: FileSystemVersionUtils): DataFrame = {
+ fsUtils: DistributedFsUtils): DataFrame = {
import za.co.absa.atum.AtumImplicits._
val fieldRenames = SchemaUtils.getRenamesInSchema(schema)
fieldRenames.foreach {
case (destinationName, sourceName) => standardizedDF.registerColumnRename(sourceName, destinationName)
}
- // standardizedDF.setCheckpoint(s"$sourceId - End", persistInDatabase = false) // TODO fix for s3 [ref issue #1416]
+ standardizedDF.setCheckpoint(s"$sourceId - End", persistInDatabase = false)
- // TODO fix for s3 [ref issue #1416]
-// val recordCount = standardizedDF.lastCheckpointRowCount match {
-// case None => standardizedDF.count
-// case Some(p) => p
-// }
- val recordCount = standardizedDF.count()
+ val recordCount = standardizedDF.lastCheckpointRowCount match {
+ case None => standardizedDF.count
+ case Some(p) => p
+ }
if (recordCount == 0) {
handleEmptyOutput(sourceId)
@@ -191,27 +196,31 @@ trait StandardizationExecution extends CommonJobExecution {
log.info(s"Writing into standardized path ${preparationResult.pathCfg.standardizationPath}")
standardizedDF.write.parquet(preparationResult.pathCfg.standardizationPath)
+
// Store performance metrics
// (record count, directory sizes, elapsed time, etc. to _INFO file metadata and performance file)
- // TODO fix for s3 [ref issue #1416]
- // val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath)
- // preparationResult.performance.finishMeasurement(stdDirSize, recordCount)
-// PerformanceMetricTools.addPerformanceMetricsToAtumMetadata(
-// spark,
-// "std",
-// preparationResult.pathCfg.rawPath,
-// preparationResult.pathCfg.standardizationPath,
-// menasCredentials.username,
-// args.mkString(" ")
-// )
-
- // TODO fix for s3 [ref issue #1416]
- //cmd.rowTag.foreach(rowTag => Atum.setAdditionalInfo("xml_row_tag" -> rowTag))
- //cmd.csvDelimiter.foreach(delimiter => Atum.setAdditionalInfo("csv_delimiter" -> delimiter))
-
- // standardizedDF.writeInfoFile(preparationResult.pathCfg.standardizationPath) // TODO fix for s3 [ref issue #1416]
- //writePerformanceMetrics(preparationResult.performance, cmd)
+ val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath)
+ preparationResult.performance.finishMeasurement(stdDirSize, recordCount)
+
+ PerformanceMetricTools.addPerformanceMetricsToAtumMetadata(
+ spark,
+ "std",
+ preparationResult.pathCfg.rawPath,
+ preparationResult.pathCfg.standardizationPath,
+ menasCredentials.username,
+ args.mkString(" ")
+ )
+
+ cmd.rowTag.foreach(rowTag => Atum.setAdditionalInfo("xml_row_tag" -> rowTag))
+ cmd.csvDelimiter.foreach(delimiter => Atum.setAdditionalInfo("csv_delimiter" -> delimiter))
+
+ val infoFilePath = s"${preparationResult.pathCfg.standardizationPath}/_INFO"
+ val infoFileLocation = infoFilePath.toS3Location(preparationResult.s3Config.region)
+ log.info(s"infoFilePath = $infoFilePath, infoFileLocation = $infoFileLocation")
+
+ standardizedDF.writeInfoFileOnS3(infoFileLocation, S3KmsSettings(preparationResult.s3Config.kmsKeyId))
+ writePerformanceMetrics(preparationResult.performance, cmd)
log.info(s"$sourceId finished successfully")
standardizedDF
}
@@ -219,34 +228,41 @@ trait StandardizationExecution extends CommonJobExecution {
//scalastyle:off parameter.number
private def ensureSplittable(df: DataFrame, path: String, schema: StructType)
- (implicit spark: SparkSession, fsUtils: FileSystemVersionUtils) = {
- // TODO fix for s3 [ref issue #1416]
-// if (fsUtils.isNonSplittable(path)) {
-// convertToSplittable(df, schema)
-// } else {
+ (implicit spark: SparkSession, fsUtils: DistributedFsUtils): DataFrame = {
+ if (fsUtils.isNonSplittable(path)) {
+ convertToSplittable(df, schema)
+ } else {
df
-// }
+ }
}
private def convertToSplittable(df: DataFrame, schema: StructType)
- (implicit spark: SparkSession, fsUtils: FileSystemVersionUtils) = {
+ (implicit spark: SparkSession, fsUtils: DistributedFsUtils): DataFrame = {
log.warn("Dataset is stored in a non-splittable format. This can have a severe performance impact.")
- val tempParquetDir = s"/tmp/nonsplittable-to-parquet-${UUID.randomUUID()}"
- log.warn(s"Converting to Parquet in temporary dir: $tempParquetDir")
-
- // Handle renaming of source columns in case there are columns
- // that will break because of issues in column names like spaces
- df.select(schema.fields.map { field: StructField =>
- renameSourceColumn(df, field)
- }: _*).write.parquet(tempParquetDir)
-
- fsUtils.deleteOnExit(tempParquetDir)
- // Reload from temp parquet and reverse column renaming above
- val dfTmp = spark.read.parquet(tempParquetDir)
- dfTmp.select(schema.fields.map { field: StructField =>
- reverseRenameSourceColumn(dfTmp, field)
- }: _*)
+ fsUtils match {
+ case utils: HdfsUtils =>
+ val tempParquetDir = s"/tmp/nonsplittable-to-parquet-${UUID.randomUUID()}"
+ log.warn(s"Converting to Parquet in temporary dir: $tempParquetDir")
+
+ // Handle renaming of source columns in case there are columns
+ // that will break because of issues in column names like spaces
+ df.select(schema.fields.map { field: StructField =>
+ renameSourceColumn(df, field)
+ }: _*).write.parquet(tempParquetDir)
+
+ utils.deleteOnExit(tempParquetDir)
+ // Reload from temp parquet and reverse column renaming above
+ val dfTmp = spark.read.parquet(tempParquetDir)
+ dfTmp.select(schema.fields.map { field: StructField =>
+ reverseRenameSourceColumn(dfTmp, field)
+ }: _*)
+
+ case utils =>
+ log.warn(s"Splittability conversion only available for HDFS, leaving as is for ${utils.getClass.getName}")
+ df
+ }
+
}
private def renameSourceColumn(df: DataFrame, field: StructField): Column = {
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala
index b9ff4d9ca..c123a5a1b 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala
@@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.rest.RestDaoFactory
import za.co.absa.enceladus.standardization.config.StandardizationConfig
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.udf.UDFLibrary
@@ -31,7 +31,8 @@ object StandardizationJob extends StandardizationExecution {
implicit val cmd: StandardizationConfig = StandardizationConfig.getFromArguments(args)
implicit val spark: SparkSession = obtainSparkSession(jobName)
- implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ implicit val fsUtils: DistributedFsUtils = getS3FsUtil
+
implicit val udfLib: UDFLibrary = new UDFLibrary
val menasCredentials = cmd.menasCredentialsFactory.getInstance()
implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls)
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala
index 1a43e1396..1347fc6c2 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala
@@ -16,12 +16,12 @@
package za.co.absa.enceladus.standardization_conformance
import za.co.absa.enceladus.common.CommonJobExecution
-import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig}
+import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config}
import za.co.absa.enceladus.conformance.ConformanceExecution
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.StandardizationExecution
import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
trait StandardizationAndConformanceExecution extends StandardizationExecution
with ConformanceExecution
@@ -36,8 +36,8 @@ trait StandardizationAndConformanceExecution extends StandardizationExecution
publishPath = publishPathOverride.getOrElse(defaultConfig.publishPath))
}
- override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = {
- validateIfPathAlreadyExists(fsUtils, pathConfig.standardizationPath)
- validateIfPathAlreadyExists(fsUtils, pathConfig.publishPath)
+ override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = {
+ validateIfPathAlreadyExists(s3Config, pathConfig.standardizationPath)
+ validateIfPathAlreadyExists(s3Config, pathConfig.publishPath)
}
}
diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala
index b0509ba26..6bb023d1c 100644
--- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala
+++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala
@@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.rest.RestDaoFactory
import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.modules.SourcePhase
import za.co.absa.enceladus.utils.udf.UDFLibrary
@@ -31,7 +31,7 @@ object StandardizationAndConformanceJob extends StandardizationAndConformanceExe
implicit val cmd: StandardizationConformanceConfig = StandardizationConformanceConfig.getFromArguments(args)
implicit val spark: SparkSession = obtainSparkSession(jobName)
- implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ implicit val fsUtils: DistributedFsUtils = getS3FsUtil
implicit val udfLib: UDFLibrary = new UDFLibrary
val menasCredentials = cmd.menasCredentialsFactory.getInstance()
implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala
index 938ed2503..ed3c91b90 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala
@@ -15,39 +15,41 @@
package za.co.absa.enceladus.common
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
+import za.co.absa.atum.core.ControlType
import za.co.absa.atum.model.{Checkpoint, Measurement}
+
import scala.util.Success
-class ControlInfoValidationSuite extends FunSuite {
+class ControlInfoValidationSuite extends AnyFunSuite {
import za.co.absa.atum.core.Constants._
private val checkpoints1 = List(
Checkpoint("raw", None, None, "", "", "", 0, List(
- Measurement("", controlTypeAbsAggregatedTotal, "", 0),
- Measurement("", controlTypeRecordCount, "", 11)
+ Measurement("", ControlType.AbsAggregatedTotal.value, "", 0),
+ Measurement("", ControlType.Count.value, "", 11)
)
),
Checkpoint("source", None, None, "", "", "", 1, List(
- Measurement("", controlTypeRecordCount, "", 3)
+ Measurement("", ControlType.Count.value, "", 3)
)
)
)
private val checkpoints2 = List(
Checkpoint("source", None, None, "", "", "", 1, List(
- Measurement("", controlTypeDistinctCount, "", 1)
+ Measurement("", ControlType.DistinctCount.value, "", 1)
)
)
)
private val checkpoints3 = List(
Checkpoint("raw", None, None, "", "", "", 0, List(
- Measurement("", controlTypeRecordCount, "", -3)
+ Measurement("", ControlType.Count.value, "", -3)
)
),
Checkpoint("source", None, None, "", "", "", 1, List(
- Measurement("", controlTypeRecordCount, "", "")
+ Measurement("", ControlType.Count.value, "", "")
)
)
)
@@ -65,7 +67,7 @@ class ControlInfoValidationSuite extends FunSuite {
val sourceResult = ControlInfoValidation.getCountFromGivenCheckpoint("source", checkpoints2)
val rawError = "Missing raw checkpoint"
- val sourceError = s"source checkpoint does not have a $controlTypeRecordCount control"
+ val sourceError = s"source checkpoint does not have a ${ControlType.Count.value} control"
assert(rawResult.failed.get.getMessage == rawError)
assert(sourceResult.failed.get.getMessage == sourceError)
@@ -75,8 +77,8 @@ class ControlInfoValidationSuite extends FunSuite {
val rawResult = ControlInfoValidation.getCountFromGivenCheckpoint("raw", checkpoints3)
val sourceResult = ControlInfoValidation.getCountFromGivenCheckpoint("source", checkpoints3)
- val rawError = s"Wrong raw $controlTypeRecordCount value: Negative value"
- val sourceError = s"""Wrong source $controlTypeRecordCount value: For input string: \"\""""
+ val rawError = s"Wrong raw ${ControlType.Count.value} value: Negative value"
+ val sourceError = s"""Wrong source ${ControlType.Count.value} value: For input string: \"\""""
assert(rawResult.failed.get.getMessage == rawError)
assert(sourceResult.failed.get.getMessage == sourceError)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala
index 8d23390f7..49791c522 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala
@@ -18,13 +18,14 @@ package za.co.absa.enceladus.common
import java.util.UUID
import com.typesafe.config.{Config, ConfigException, ConfigFactory, ConfigValueFactory}
-import org.scalatest.{FlatSpec, Matchers}
import za.co.absa.enceladus.common.RecordIdGenerationSuite.{SomeData, SomeDataWithId}
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import RecordIdGeneration._
import IdType._
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
-class RecordIdGenerationSuite extends FlatSpec with Matchers with SparkTestBase {
+class RecordIdGenerationSuite extends AnyFlatSpec with Matchers with SparkTestBase {
import spark.implicits._
val data1 = Seq(
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala
index 3f6bcd749..d3567a544 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.common.plugin
import com.typesafe.config.ConfigFactory
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.common.plugin.dummy.{DummyControlMetricsPlugin1, DummyControlMetricsPlugin2}
import za.co.absa.enceladus.plugins.api.control.ControlMetricsPlugin
import scala.collection.JavaConverters._
-class ControlMetricsPluginSuite extends FunSuite {
+class ControlMetricsPluginSuite extends AnyFunSuite {
test("Test the control plugin loader loads nothing if no class is specified") {
val conf = ConfigFactory.parseMap(Map[String, String]().asJava)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala
index ef2eeace8..b3d856126 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.common.plugin
import com.typesafe.config.ConfigFactory
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.common.plugin.dummy.{DummyPostProcessor1, DummyPostProcessor2}
import za.co.absa.enceladus.plugins.api.postprocessor.PostProcessor
import scala.collection.JavaConverters._
-class PostProcessorPluginSuite extends FunSuite {
+class PostProcessorPluginSuite extends AnyFunSuite {
test("Test the postprocessor loader loads nothing if no class is specified") {
val conf = ConfigFactory.parseMap(Map[String, String]().asJava)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala
index 2d649a598..9678192d5 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala
@@ -15,16 +15,17 @@
package za.co.absa.enceladus.common.version
-import org.mockito.ArgumentMatchers._
import org.mockito.Mockito
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Assertion, FlatSpec, Matchers}
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Assertion
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
import org.slf4j.Logger
import za.co.absa.commons.version.Version._
import scala.reflect.ClassTag
-class SparkVersionGuardSuite extends FlatSpec with Matchers with MockitoSugar {
+class SparkVersionGuardSuite extends AnyFlatSpec with Matchers with MockitoSugar {
private def ensureThrowsWithMessageIncluding[T <: Throwable](messageSubstringToAppear: String)(fun: => scala.Any)
(implicit ev: ClassTag[T]): Assertion = {
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala
index f2d42c5e5..04ef0ce3c 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala
@@ -17,13 +17,13 @@ package za.co.absa.enceladus.conformance.config
import java.time.ZonedDateTime
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.ConformanceExecution
import za.co.absa.enceladus.dao.auth.{MenasKerberosCredentials, MenasPlainCredentials}
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class ConformanceParserSuite extends FunSuite with SparkTestBase {
+class ConformanceParserSuite extends AnyFunSuite with SparkTestBase {
private val year = "2018"
private val month = "12"
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala
index 81c436577..c9e7d77fd 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.conformance.datasource
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.EmployeeConformance
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class DatasourceSuite extends FunSuite with SparkTestBase {
+class DatasourceSuite extends AnyFunSuite with SparkTestBase {
test("Data Source loads all data needed for test sample") {
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala
index dcb6e46a6..f93d56426 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala
@@ -17,17 +17,20 @@ package za.co.absa.enceladus.conformance.interpreter
import org.apache.spark.sql.functions._
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.{BeforeAndAfterAll}
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.datasource.DataSource
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.conformance.samples._
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAfterAll {
+class ArrayConformanceSuite extends AnyFunSuite with SparkTestBase with BeforeAndAfterAll {
import spark.implicits._
// spark.enableControlFrameworkTracking()
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
implicit var dao: MenasDAO = _
implicit var progArgs: ConformanceConfig = _
@@ -58,7 +61,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val conformedDf = DynamicInterpreter.interpret(ArraySamples.conformanceDef,
+ val conformedDf = DynamicInterpreter().interpret(ArraySamples.conformanceDef,
df)
val expected = ArraySamples.conformedData.toArray.sortBy(_.order).toList
val conformed = conformedDf.as[ConformedOuter].collect().sortBy(_.order).toList
@@ -83,7 +86,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val conformedDf = DynamicInterpreter.interpret(NullArraySamples.mappingOnlyConformanceDef,
+ val conformedDf = DynamicInterpreter().interpret(NullArraySamples.mappingOnlyConformanceDef,
df)
val expected = NullArraySamples.conformedData.toArray.sortBy(_.order).toList
@@ -113,7 +116,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val conformedDf = DynamicInterpreter.interpret(EmtpyArraySamples.mappingOnlyConformanceDef,
+ val conformedDf = DynamicInterpreter().interpret(EmtpyArraySamples.mappingOnlyConformanceDef,
df)
val expected = EmtpyArraySamples.conformedData.toArray.sortBy(_.order).toList
val conformed = conformedDf.as[OuterErr].collect().sortBy(_.order).toList
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala
index 2d4873474..9d5c94eca 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala
@@ -16,12 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.datasource.DataSource
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.MappingConformanceRule
import za.co.absa.enceladus.model.{MappingTable, Dataset => ConfDataset}
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
case class MyMappingTable(id: Int, mappedAttr: MyMappingTableInner)
@@ -29,7 +30,7 @@ case class MyMappingTableInner(description: String, name: String)
case class MyData(id: Int, toJoin: Int)
case class MyDataConfd(id: Int, toJoin: Int, confMapping: MyMappingTableInner)
-class ChorusMockSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class ChorusMockSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
def testChorusMockData(useExperimentalMappingRule: Boolean): Unit = {
val d = Seq(
@@ -71,7 +72,9 @@ class ChorusMockSuite extends FunSuite with SparkTestBase with LoggerTestBase {
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val confd = DynamicInterpreter.interpret(conformanceDef, inputDf).repartition(2)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
+ val confd = DynamicInterpreter().interpret(conformanceDef, inputDf).repartition(2)
logDataFrameContent(confd)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala
index 47e09060b..fdc49e9b4 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.conformance.interpreter
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.{BeforeAndAfterAll}
import za.co.absa.atum.model.ControlMeasure
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.datasource.DataSource
@@ -25,9 +25,10 @@ import za.co.absa.enceladus.conformance.samples._
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import org.json4s._
import org.json4s.jackson._
-import za.co.absa.enceladus.utils.fs.FileReader
+import org.scalatest.funsuite.AnyFunSuite
+import za.co.absa.enceladus.utils.fs.{FileReader, HdfsUtils}
-class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAll with LoggerTestBase {
+class InterpreterSuite extends AnyFunSuite with SparkTestBase with BeforeAndAfterAll with LoggerTestBase {
override def beforeAll(): Unit = {
super.beforeAll
@@ -55,6 +56,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl
val isCatalystWorkaroundEnabled = true
import spark.implicits._
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val mappingTablePattern = "{0}/{1}/{2}"
val dfs = DataSource.getDataFrame(EmployeeConformance.employeeDS.hdfsPath, "2017-11-01", mappingTablePattern)
@@ -70,7 +72,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val conformed = DynamicInterpreter.interpret(EmployeeConformance.employeeDS, dfs)
+ val conformed = DynamicInterpreter().interpret(EmployeeConformance.employeeDS, dfs)
val data = conformed.as[ConformedEmployee].collect.sortBy(_.employee_id).toList
val expected = EmployeeConformance.conformedEmployees.sortBy(_.employee_id).toList
@@ -112,6 +114,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl
val isCatalystWorkaroundEnabled = true
import spark.implicits._
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
val mappingTablePattern = "{0}/{1}/{2}"
val dfs = DataSource.getDataFrame(TradeConformance.tradeDS.hdfsPath, "2017-11-01", mappingTablePattern)
@@ -127,7 +130,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val conformed = DynamicInterpreter.interpret(TradeConformance.tradeDS, dfs).cache
+ val conformed = DynamicInterpreter().interpret(TradeConformance.tradeDS, dfs).cache
val data = conformed.repartition(1).orderBy($"id").toJSON.collect.mkString("\n")
@@ -168,23 +171,19 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl
})
}
- // TODO fix for s3 [ref issue #1416]
- ignore("End to end dynamic conformance test") {
+ test("End to end dynamic conformance test") {
testEndToEndDynamicConformance(useExperimentalMappingRule = false)
}
- // TODO fix for s3 [ref issue #1416]
- ignore("End to end dynamic conformance test (experimental optimized mapping rule)") {
+ test("End to end dynamic conformance test (experimental optimized mapping rule)") {
testEndToEndDynamicConformance(useExperimentalMappingRule = true)
}
- // TODO fix for s3 [ref issue #1416]
- ignore("End to end array dynamic conformance test") {
+ test("End to end array dynamic conformance test") {
testEndToEndArrayConformance(useExperimentalMappingRule = false)
}
- // TODO fix for s3 [ref issue #1416]
- ignore("End to end array dynamic conformance test (experimental optimized mapping rule)") {
+ test("End to end array dynamic conformance test (experimental optimized mapping rule)") {
testEndToEndArrayConformance(useExperimentalMappingRule = true)
}
}
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala
index fa78f5a87..5853d33e9 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala
@@ -16,15 +16,16 @@
package za.co.absa.enceladus.conformance.interpreter
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.datasource.DataSource
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.{DropConformanceRule, LiteralConformanceRule, MappingConformanceRule}
import za.co.absa.enceladus.model.{MappingTable, Dataset => ConfDataset}
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with LoggerTestBase {
+class LiteralJoinMappingRuleTest extends AnyFunSuite with SparkTestBase with LoggerTestBase {
def testMappingRuleWithLiteral(useExperimentalMappingRule: Boolean): Unit = {
@@ -55,9 +56,9 @@ class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with Logger
conformance = List(
LiteralConformanceRule(order = 1, outputColumn = "country", controlCheckpoint = true, value = "CZ"),
MappingConformanceRule(order = 2, controlCheckpoint = true, mappingTable = "countryMT", mappingTableVersion = 0,
- attributeMappings = Map("countryCode" -> "country"), targetAttribute = "countryName",
+ attributeMappings = Map("countryCode" -> "country"), targetAttribute = "countryName",
outputColumn = "conformedCountry", isNullSafe = true),
- DropConformanceRule(order = 3, controlCheckpoint = false, outputColumn = "country")
+ DropConformanceRule(order = 3, controlCheckpoint = false, outputColumn = "country")
)
)
@@ -67,7 +68,9 @@ class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with Logger
.setControlFrameworkEnabled(enableCF)
.setBroadcastStrategyMode(Never)
- val confd = DynamicInterpreter.interpret(conformanceDef, inputDf).repartition(2)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
+ val confd = DynamicInterpreter().interpret(conformanceDef, inputDf).repartition(2)
confd.write.mode("overwrite").parquet("_testOutput")
val readAgain = spark.read.parquet("_testOutput")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala
index b09154200..b044de35c 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala
@@ -15,8 +15,9 @@
package za.co.absa.enceladus.conformance.interpreter
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.fixtures.NestedStructsFixture
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
/**
@@ -24,7 +25,9 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase
*
* Without applying a workaround any test in this suite makes Spark freeze.
*/
-class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFixture {
+class NestedStructSuite extends AnyFunSuite with SparkTestBase with NestedStructsFixture {
+
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
test("Test Dynamic Conformance does not hang on many mixed conformance rules") {
implicit val featureSwitches: FeatureSwitches = FeatureSwitches()
@@ -32,7 +35,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
- val conformed = DynamicInterpreter.interpret(nestedStructsDS, standardizedDf)
+ val conformed = DynamicInterpreter().interpret(nestedStructsDS, standardizedDf)
assert(conformed.count() == 20)
}
@@ -43,7 +46,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
- val conformed = DynamicInterpreter.interpret(nestedStructsUpperDS, standardizedDf)
+ val conformed = DynamicInterpreter().interpret(nestedStructsUpperDS, standardizedDf)
assert(conformed.count() == 20)
}
@@ -54,7 +57,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
- val conformed = DynamicInterpreter.interpret( nestedStructsNegationDS, standardizedDf)
+ val conformed = DynamicInterpreter().interpret( nestedStructsNegationDS, standardizedDf)
assert(conformed.count() == 20)
}
@@ -65,7 +68,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
- val conformed = DynamicInterpreter.interpret(nestedStructsCastingDS, standardizedDf)
+ val conformed = DynamicInterpreter().interpret(nestedStructsCastingDS, standardizedDf)
assert(conformed.count() == 20)
}
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala
index fc426919e..48b18919b 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala
@@ -19,9 +19,8 @@ import org.apache.commons.configuration2.Configuration
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.{DataFrame, Row}
-import org.mockito.Mockito.when
-import org.scalatest.FunSuite
-import org.scalatest.mockito.MockitoSugar
+import org.scalatest.funsuite.AnyFunSuite
+import org.mockito.scalatest.MockitoSugar
import za.co.absa.enceladus.conformance.HyperConformance
import za.co.absa.enceladus.conformance.HyperConformanceAttributes._
import za.co.absa.enceladus.conformance.config.ConformanceConfig
@@ -31,7 +30,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-trait StreamingFixture extends FunSuite with SparkTestBase with MockitoSugar {
+trait StreamingFixture extends AnyFunSuite with SparkTestBase with MockitoSugar {
implicit val menasBaseUrls: List[String] = List.empty
implicit val cmd: ConformanceConfig = ConformanceConfig(reportVersion = Some(1))
@@ -51,6 +50,8 @@ trait StreamingFixture extends FunSuite with SparkTestBase with MockitoSugar {
when(configStub.containsKey(menasUriKey)).thenReturn(true)
when(configStub.getString(menasUriKey)).thenReturn("https://mymenas.org")
when(configStub.containsKey(menasAuthKeytabKey)).thenReturn(true)
+ when(configStub.containsKey(menasCredentialsFileKey)).thenReturn(false)
+ when(configStub.getString(menasAuthKeytabKey)).thenReturn("key1")
val memoryStream = new MemoryStream[Row](1, spark.sqlContext)(RowEncoder(input.schema))
val hyperConformance = HyperConformance(configStub).asInstanceOf[HyperConformance]
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala
index dbc56aba3..085fd7dbf 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala
@@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.types._
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import org.slf4j.event.Level.ERROR
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches, RuleValidators}
import za.co.absa.enceladus.conformance.samples.CastingRuleSamples
import za.co.absa.enceladus.dao.MenasDAO
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-class CastingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class CastingRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
private val ruleName = "Casting rule"
private val columnName = "dummy"
@@ -51,7 +52,9 @@ class CastingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val conformed = DynamicInterpreter.interpret(CastingRuleSamples.ordersDS, inputDf).cache
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
+ val conformed = DynamicInterpreter().interpret(CastingRuleSamples.ordersDS, inputDf).cache
val conformedJSON = JsonUtils.prettySparkJSON(conformed.orderBy($"id").toJSON.collect)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala
index db66524a3..e44c8d1b6 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import CoalesceRuleSuite._
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
@@ -83,7 +83,7 @@ object CoalesceRuleSuite {
)
}
-class CoalesceRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class CoalesceRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
test("Coalesce conformance rule on root level fields") {
val inputDf: DataFrame = spark.createDataFrame(shopItems)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala
index 79d89be60..eac2ed4f0 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.{ConcatenationConformanceRule, UppercaseConformanceRule}
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class ConcatenationRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class ConcatenationRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
private val concatRule = ConcatenationConformanceRule(order = 1, outputColumn = "CombinedName",
controlCheckpoint = false, Seq("name", "city", "address"))
private val concatArrayRule = ConcatenationConformanceRule(order = 2, outputColumn = "rooms.CombinedLabel",
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala
index 081827522..c60fe6a16 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.DropConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class DropRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class DropRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val dropRule = DropConformanceRule(order = 1, controlCheckpoint = false, outputColumn = "name" )
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala
index f9dee622b..00c354eb8 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.FillNullsConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class FillNullsRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class FillNullsRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val fillNullsRule = FillNullsConformanceRule(
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala
index 4ac42525e..b47ade6ff 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.LiteralConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class LiteralRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class LiteralRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val literalRule = LiteralConformanceRule(order = 1, outputColumn = "System", controlCheckpoint = false, value = "FA")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala
index b6fd23928..479cb7f32 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala
@@ -17,21 +17,25 @@ package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.commons.io.IOUtils
import org.apache.spark.sql.functions._
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.DynamicInterpreter
import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.NestedTestCaseFactory._
import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory._
import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.{NestedTestCaseFactory, SimpleTestCaseFactory}
import za.co.absa.enceladus.utils.error.ErrorMessage
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll {
+class MappingRuleBroadcastSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll {
import spark.implicits._
private val simpleTestCaseFactory = new SimpleTestCaseFactory()
private val nestedTestCaseFactory = new NestedTestCaseFactory()
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
override def beforeAll(): Unit = {
super.beforeAll()
simpleTestCaseFactory.createMappingTables()
@@ -51,7 +55,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
simpleTestCaseFactory.getTestCase(true, true, simpleMappingRule)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"int_num", $"long_num", $"str_val", $"errCol", $"conformedIntNum")
.cache
@@ -69,7 +73,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
simpleTestCaseFactory.getTestCase(true, true, simpleMappingRuleWithDefaultValue)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"int_num", $"long_num", $"str_val", $"errCol", $"conformedIntNum")
.cache
@@ -87,7 +91,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule1)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol", $"conformedNum1")
.cache
@@ -105,7 +109,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule2)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol", $"conformedNum2")
.cache
@@ -123,7 +127,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule3)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"conformedNum3", $"errCol")
.cache
@@ -141,7 +145,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule1)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array2", $"errCol", $"array1")
.cache
@@ -159,7 +163,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule2)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -177,7 +181,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule3)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -195,7 +199,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule4)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -213,7 +217,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule5)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -231,7 +235,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
implicit val (inputDf, dataset, dao, progArgs, featureSwitches) =
nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule6)
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -251,7 +255,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
val inputDf2 = inputDf.withColumn("errCol", array(typedLit(ErrorMessage("Initial", "000", "ErrMsg", "id", Seq(), Seq()))))
- val dfOut = DynamicInterpreter.interpret(dataset, inputDf2)
+ val dfOut = DynamicInterpreter().interpret(dataset, inputDf2)
.select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol")
.cache
@@ -267,7 +271,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT
nestedTestCaseFactory.getTestCase(true, true, wrongMappingRule1)
intercept[Exception] {
- DynamicInterpreter.interpret(dataset, inputDf)
+ DynamicInterpreter().interpret(dataset, inputDf)
}
}
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala
index 23970b89a..1b92ebbe0 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala
@@ -16,15 +16,19 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.AnalysisException
-import org.scalatest.{BeforeAndAfterAll, FunSuite}
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.DynamicInterpreter
import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory
import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory._
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll {
+class MappingRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll {
private val testCaseFactory = new SimpleTestCaseFactory()
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
override def beforeAll(): Unit = {
super.beforeAll()
testCaseFactory.createMappingTables()
@@ -40,7 +44,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w
testCaseFactory.getTestCase(true, false, nonExistentTableMappingRule)
val ex = intercept[AnalysisException] {
- DynamicInterpreter.interpret(dataset, inputDf).cache
+ DynamicInterpreter().interpret(dataset, inputDf).cache
}
assert(ex.getMessage.contains("Path does not exist"))
@@ -51,7 +55,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w
testCaseFactory.getTestCase(false, false, nonExistentTableMappingRule)
val ex = intercept[AnalysisException] {
- DynamicInterpreter.interpret(dataset, inputDf).cache
+ DynamicInterpreter().interpret(dataset, inputDf).cache
}
assert(ex.getMessage.contains("Path does not exist"))
@@ -62,7 +66,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w
testCaseFactory.getTestCase(true, false, emptyTableMappingRule)
val ex = intercept[RuntimeException] {
- DynamicInterpreter.interpret(dataset, inputDf).cache
+ DynamicInterpreter().interpret(dataset, inputDf).cache
}
assert(ex.getMessage.contains("Unable to read the mapping table"))
@@ -73,7 +77,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w
testCaseFactory.getTestCase(false, false, emptyTableMappingRule)
val ex = intercept[RuntimeException] {
- DynamicInterpreter.interpret(dataset, inputDf).cache
+ DynamicInterpreter().interpret(dataset, inputDf).cache
}
assert(ex.getMessage.contains("Unable to read the mapping table"))
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala
index 14669a07f..a54121fc4 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala
@@ -15,13 +15,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.datasource.DataSource
import za.co.absa.enceladus.conformance.samples.EmployeeConformance
import za.co.absa.enceladus.model.conformanceRule.MappingConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class MappingRuleValidationSuite extends FunSuite with SparkTestBase {
+class MappingRuleValidationSuite extends AnyFunSuite with SparkTestBase {
// scalastyle:off line.size.limit
test("Mapping rule fields existence validation test") {
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala
index 515738963..78ea290bb 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala
@@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.Dataset
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import org.slf4j.event.Level.ERROR
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches}
import za.co.absa.enceladus.conformance.samples.NegationRuleSamples
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.{Dataset => ConfDataset}
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-class NegationRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase{
+class NegationRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{
import spark.implicits._
@@ -119,7 +120,8 @@ class NegationRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase{
.setExperimentalMappingRuleEnabled(experimentalMR)
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val conformed = DynamicInterpreter.interpret(enceladusDataset, inputDf).cache
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+ val conformed = DynamicInterpreter().interpret(enceladusDataset, inputDf).cache
val conformedJSON = conformed.toJSON.collect().mkString("\n")
if (conformedJSON != expectedJSON) {
logger.error("EXPECTED:")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala
index 9a4dedfdb..2eb36d395 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala
@@ -16,12 +16,14 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.types.{DataType, StructType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches, InterpreterContext, Never}
import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule}
import za.co.absa.enceladus.conformance.samples.TradeConformance._
+import za.co.absa.enceladus.utils.fs.HdfsUtils
+import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class RuleOptimizationSuite extends FunSuite {
+class RuleOptimizationSuite extends AnyFunSuite with SparkTestBase {
private val schemaJson =
"""{
@@ -109,10 +111,12 @@ class RuleOptimizationSuite extends FunSuite {
null,
null)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
test("Test non-mapping rules are not grouped") {
val rules: List[ConformanceRule] = List(litRule, upperRule, lit2Rule)
- val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema)
+ val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema)
assert(actualInterpreters.length == 3)
assert(actualInterpreters.head.isInstanceOf[LiteralRuleInterpreter])
@@ -123,7 +127,7 @@ class RuleOptimizationSuite extends FunSuite {
test("Test mapping rules having the same array are grouped") {
val rules: List[ConformanceRule] = List(litRule, countryRule, productRule, lit2Rule)
- val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema)
+ val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema)
assert(actualInterpreters.length == 6)
assert(actualInterpreters.head.isInstanceOf[LiteralRuleInterpreter])
@@ -137,7 +141,7 @@ class RuleOptimizationSuite extends FunSuite {
test("Test single arrays in the beginning and at the end") {
val rules: List[ConformanceRule] = List(countryRule, litRule, lit2Rule, productRule)
- val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema)
+ val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema)
assert(actualInterpreters.length == 4)
assert(actualInterpreters.head.isInstanceOf[MappingRuleInterpreterGroupExplode])
@@ -149,7 +153,7 @@ class RuleOptimizationSuite extends FunSuite {
test("Test several arrays in the beginning and at the end") {
val rules: List[ConformanceRule] = List(countryRule, productRule, litRule, lit2Rule, productRule, countryRule)
- val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema)
+ val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema)
assert(actualInterpreters.length == 10)
assert(actualInterpreters.head.isInstanceOf[ArrayExplodeInterpreter])
@@ -173,7 +177,7 @@ class RuleOptimizationSuite extends FunSuite {
val rules: List[ConformanceRule] = List(countryRule, productRule, legIdRule, countryRule, legIdRule,
countryRule, productRule, legIdRule, legIdRule)
- val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema)
+ val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema)
assert(actualInterpreters.length == 15)
assert(actualInterpreters.head.isInstanceOf[ArrayExplodeInterpreter])
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala
index 1bb0a9933..34d29ee5c 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala
@@ -18,7 +18,7 @@ package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.{ExplosionState, InterpreterContextArgs}
import za.co.absa.enceladus.conformance.samples.EmployeeConformance
import za.co.absa.enceladus.dao.MenasDAO
@@ -26,7 +26,7 @@ import za.co.absa.enceladus.model.conformanceRule.ConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class RulesSuite extends FunSuite with SparkTestBase {
+class RulesSuite extends AnyFunSuite with SparkTestBase {
private val dummyInterpreter = new RuleInterpreter {
override def conformanceRule: Option[ConformanceRule] = None
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala
index b54b4f308..9f2be7c95 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.SingleColumnConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class SingleColumnRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class SingleColumnRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val singleColumnRule = SingleColumnConformanceRule(order = 1, controlCheckpoint = false, "conformedId", "id", "id2")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala
index 5a0e08898..b03b11048 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala
@@ -15,13 +15,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.SparkSessionConfConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class SparkSessionRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class SparkSessionRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val sparkSessionRule = SparkSessionConfConformanceRule(order = 1, outputColumn = "TimeZone", controlCheckpoint = false, sparkConfKey = "spark.sql.session.timeZone")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala
index 4fd1bdb07..0263f7842 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala
@@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito.{mock, when => mockWhen}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import org.slf4j.event.Level._
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches}
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
-trait TestRuleBehaviors extends FunSuite with SparkTestBase with LoggerTestBase {
+trait TestRuleBehaviors extends AnyFunSuite with SparkTestBase with LoggerTestBase {
def conformanceRuleShouldMatchExpected(inputDf: DataFrame, inputDataset: Dataset, expectedJSON: String) {
implicit val dao: MenasDAO = mock(classOf[MenasDAO])
@@ -44,8 +45,9 @@ trait TestRuleBehaviors extends FunSuite with SparkTestBase with LoggerTestBase
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
- val conformed = DynamicInterpreter.interpret(inputDataset, inputDf)
+ val conformed = DynamicInterpreter().interpret(inputDataset, inputDf)
val conformedJSON = conformed.orderBy($"id").toJSON.collect().mkString("\n")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala
index 5f789eda0..77043aaeb 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.conformance.interpreter.rules
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.samples.DeepArraySamples
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.model.conformanceRule.UppercaseConformanceRule
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class UppercaseRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors {
+class UppercaseRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors {
// scalastyle:off line.size.limit
private val uppercaseRule = UppercaseConformanceRule(order = 1, outputColumn = "ConformedName", controlCheckpoint = false, inputColumn = "name")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala
index 50ba0cd2c..44067bd93 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala
@@ -18,7 +18,7 @@ package za.co.absa.enceladus.conformance.interpreter.rules.custom
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.mockito.Mockito.mock
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.rules.RuleInterpreter
import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, ExplosionState, FeatureSwitches, InterpreterContextArgs}
@@ -26,6 +26,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.ConformanceRule
import za.co.absa.enceladus.model.{conformanceRule, Dataset => ConfDataset}
import za.co.absa.enceladus.utils.error.ErrorMessage
+import za.co.absa.enceladus.utils.fs.HdfsUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
case class MyCustomRule(
@@ -56,7 +57,7 @@ case class MyCustomRuleInterpreter(rule: MyCustomRule) extends RuleInterpreter {
case class Mine(id: Int)
case class MineConfd(id: Int, myOutputCol: Double, errCol: Seq[ErrorMessage])
-class CustomRuleSuite extends FunSuite with SparkTestBase {
+class CustomRuleSuite extends AnyFunSuite with SparkTestBase {
import spark.implicits._
// we may WANT to enable control framework & spline here
@@ -89,7 +90,9 @@ class CustomRuleSuite extends FunSuite with SparkTestBase {
.setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled)
.setControlFrameworkEnabled(enableCF)
- val actualDf: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData)
+ implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+
+ val actualDf: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData)
val actual: Seq[MineConfd] = actualDf.as[MineConfd].collect().toSeq
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala
index fbf547f10..9fead4962 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala
@@ -25,7 +25,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule}
import za.co.absa.enceladus.model.test.factories.{DatasetFactory, MappingTableFactory}
import za.co.absa.enceladus.model.{Dataset, MappingTable}
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.{HdfsUtils, LocalFsUtils}
/**
@@ -214,8 +214,8 @@ class NestedTestCaseFactory(implicit spark: SparkSession) {
import NestedTestCaseFactory._
private val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
- private val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
- private val tempDir = fsUtils.getLocalTemporaryDirectory("test_case_factory")
+ private val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+ private val tempDir = LocalFsUtils.getLocalTemporaryDirectory("test_case_factory")
/**
* This method returns all objects necessary to run a dynamic conformance job.
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala
index 84f508c1b..a07cd9e71 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala
@@ -25,7 +25,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule}
import za.co.absa.enceladus.model.test.factories.{DatasetFactory, MappingTableFactory}
import za.co.absa.enceladus.model.{Dataset, DefaultValue, MappingTable}
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.{HdfsUtils, LocalFsUtils}
object SimpleTestCaseFactory {
@@ -124,8 +124,8 @@ class SimpleTestCaseFactory(implicit spark: SparkSession) {
import spark.implicits._
private val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
- private val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
- private val tempDir = fsUtils.getLocalTemporaryDirectory("test_case_factory")
+ private val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
+ private val tempDir = LocalFsUtils.getLocalTemporaryDirectory("test_case_factory")
/**
* This method returns all objects necessary to run a dynamic conformance job.
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala
index dd3fa0114..2d41a62e1 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala
@@ -16,10 +16,10 @@
package za.co.absa.enceladus.conformance.streaming
import org.apache.spark.sql.DataFrame
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.conformance.interpreter.fixtures.{NestedStructsFixture, StreamingFixture}
-class HyperConformanceIntegrationSuite extends FunSuite with StreamingFixture with NestedStructsFixture {
+class HyperConformanceIntegrationSuite extends AnyFunSuite with StreamingFixture with NestedStructsFixture {
test("Test with catalyst workaround, literal factory") {
implicit val infoDateFactory: InfoDateFactory = new InfoDateLiteralFactory("2020-05-23")
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala
index 43dc7f29b..f5cb3d89a 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala
@@ -17,7 +17,8 @@ package za.co.absa.enceladus.conformance.streaming
import java.util.ServiceLoader
-import org.scalatest.{FlatSpec, Matchers}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.conformance.HyperConformance
import za.co.absa.hyperdrive.ingestor.api.{ComponentFactory, ComponentFactoryProvider}
import za.co.absa.hyperdrive.ingestor.api.transformer.{StreamTransformerFactory, StreamTransformerFactoryProvider}
@@ -30,7 +31,7 @@ import scala.reflect.ClassTag
* It is based on:
* https://github.com/AbsaOSS/hyperdrive/blob/v3.0.0/ingestor-default/src/test/scala/za/co/absa/hyperdrive/ingestor/implementation/TestServiceProviderConfiguration.scala
*/
-class HyperConformanceSuite extends FlatSpec with Matchers {
+class HyperConformanceSuite extends AnyFlatSpec with Matchers {
behavior of "Service Provider Interface (META-INF/services)"
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala
index d02fe6663..e9cdbf7a3 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala
@@ -16,12 +16,12 @@
package za.co.absa.enceladus.conformance.streaming
import org.apache.commons.configuration2.Configuration
-import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Matchers, WordSpec}
+import org.scalatest.matchers.should.Matchers
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.conformance.HyperConformanceAttributes._
-class InfoDateFactorySuite extends WordSpec with Matchers with MockitoSugar {
+class InfoDateFactorySuite extends AnyWordSpec with Matchers with MockitoSugar {
private val configStub: Configuration = mock[Configuration]
"InfoDateFactory" should {
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala
index 2081ac3ef..240599ffb 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala
@@ -19,15 +19,16 @@ import java.nio.charset.StandardCharsets
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{StringType, StructField, StructType}
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Outcome, fixture}
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Outcome
+import org.scalatest.funsuite.FixtureAnyFunSuite
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.config.StandardizationConfig
import za.co.absa.enceladus.standardization.fixtures.TempFileFixture
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class StandardizationCobolAsciiSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
+class StandardizationCobolAsciiSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
type FixtureParam = String
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala
index 59a3ec86c..ea0732a64 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala
@@ -17,15 +17,16 @@ package za.co.absa.enceladus.standardization
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{StringType, StructField, StructType}
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Outcome
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.config.StandardizationConfig
import za.co.absa.enceladus.standardization.fixtures.TempFileFixture
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class StandardizationCobolEbcdicSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
+class StandardizationCobolEbcdicSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
type FixtureParam = String
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala
index 44e51f04a..ab37ef413 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala
@@ -16,8 +16,8 @@
package za.co.absa.enceladus.standardization
import org.apache.spark.sql.types.{DataType, StructType}
-import org.scalatest.FunSuite
-import org.scalatest.mockito.MockitoSugar
+import org.scalatest.funsuite.AnyFunSuite
+import org.mockito.scalatest.MockitoSugar
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
@@ -29,7 +29,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationFixedWidthSuite extends FunSuite with SparkTestBase with MockitoSugar{
+class StandardizationFixedWidthSuite extends AnyFunSuite with SparkTestBase with MockitoSugar{
private implicit val udfLibrary:UDFLibrary = new UDFLibrary()
private val log: Logger = LoggerFactory.getLogger(this.getClass)
private val argsBase = ("--dataset-name Foo --dataset-version 1 --report-date 2020-06-22 --report-version 1 " +
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala
index 278076f22..33b0c86e5 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala
@@ -16,8 +16,8 @@
package za.co.absa.enceladus.standardization
import org.apache.spark.sql.types.{DataType, StructType}
-import org.scalatest.FunSuite
-import org.scalatest.mockito.MockitoSugar
+import org.scalatest.funsuite.AnyFunSuite
+import org.mockito.scalatest.MockitoSugar
import org.slf4j.Logger
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
@@ -29,7 +29,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationJsonSuite extends FunSuite with SparkTestBase with MockitoSugar{
+class StandardizationJsonSuite extends AnyFunSuite with SparkTestBase with MockitoSugar{
private implicit val udfLibrary:UDFLibrary = new UDFLibrary()
private val standardizationReader = new StandardizationPropertiesProvider()
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala
index 0e758922e..adc1d6dea 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala
@@ -19,8 +19,9 @@ import java.util.UUID
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types._
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Outcome
import org.slf4j.Logger
import za.co.absa.enceladus.common.RecordIdGeneration.IdType
import za.co.absa.enceladus.dao.MenasDAO
@@ -33,7 +34,7 @@ import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationParquetSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
+class StandardizationParquetSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
type FixtureParam = String
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala
index a57e50a70..b403e5f00 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala
@@ -20,8 +20,9 @@ import java.nio.charset.StandardCharsets
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.mockito.scalatest.MockitoSugar
+import org.scalatest.Outcome
import org.slf4j.Logger
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
@@ -33,7 +34,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import za.co.absa.enceladus.utils.udf.UDFLibrary
import za.co.absa.enceladus.utils.validation.ValidationException
-class StandardizationRerunSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
+class StandardizationRerunSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar {
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala
index 63e3e136d..ed20ba4c1 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala
@@ -17,13 +17,13 @@ package za.co.absa.enceladus.standardization.config
import java.time.ZonedDateTime
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.dao.auth.{MenasKerberosCredentials, MenasPlainCredentials}
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.StandardizationExecution
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class StandardizationParserSuite extends FunSuite with SparkTestBase {
+class StandardizationParserSuite extends AnyFunSuite with SparkTestBase {
private val year = "2018"
private val month = "12"
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala
index 48397c8c4..f8d8a5a9a 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala
@@ -15,10 +15,11 @@
package za.co.absa.enceladus.standardization.csv
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.scalatest.Outcome
import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture
-class EnhancedStandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture {
+class EnhancedStandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture {
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala
index 471dbd886..8addf882a 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala
@@ -1,9 +1,10 @@
package za.co.absa.enceladus.standardization.csv
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.scalatest.Outcome
import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture
-class NoneValueStandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture {
+class NoneValueStandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture {
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
// A field containing the delimiter with the escape has to be enclosed in specified quotes
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala
index 61efcfc76..9b12a5410 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala
@@ -16,10 +16,11 @@
package za.co.absa.enceladus.standardization.csv
import org.apache.spark.SparkException
-import org.scalatest.{Outcome, fixture}
+import org.scalatest.funsuite.FixtureAnyFunSuite
+import org.scalatest.Outcome
import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture
-class StandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture{
+class StandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture{
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala
index 2d59a6dad..3cef918c4 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala
@@ -20,7 +20,7 @@ import java.nio.charset.{Charset, StandardCharsets}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
-import org.scalatest.mockito.MockitoSugar
+import org.mockito.scalatest.MockitoSugar
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.Dataset
import za.co.absa.enceladus.standardization.StandardizationPropertiesProvider
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala
index 3bcdf2e37..6ab624c36 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
@@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.udf.UDFLibrary
case class Root(ConformedParty: Party, errCol: Seq[ErrorMessage] = Seq.empty)
case class Party(key: Integer, clientKeys1: Seq[String], clientKeys2: Seq[String])
-class CounterPartySuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class CounterPartySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
test("Mimic running standardization twice on counter party") {
import spark.implicits._
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala
index 95790c827..2accd11df 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala
@@ -19,7 +19,7 @@ import java.sql.{Date, Timestamp}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.standardization.interpreter.stages.SchemaChecker
import za.co.absa.enceladus.standardization.samples.TestSamples
import za.co.absa.enceladus.utils.error.ErrorMessage
@@ -30,7 +30,7 @@ import za.co.absa.enceladus.utils.validation.{SchemaValidator, ValidationError,
import scala.io.Source
-class DateTimeSuite extends FunSuite with SparkTestBase with LoggerTestBase{
+class DateTimeSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{
import spark.implicits._
lazy val data: DataFrame = spark.createDataFrame(TestSamples.dateSamples)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala
index 83a768385..a514790ac 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types.{DataType, StructType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.standardization.samples.{StdEmployee, TestSamples}
import za.co.absa.enceladus.utils.fs.FileReader
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class SampleDataSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class SampleDataSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
test("Simple Example Test") {
import spark.implicits._
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala
index cd5317899..8367eecc9 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.standardization.interpreter.StandardizationInterpreterSuite._
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.fs.FileReader
@@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreterSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala
index 6da6c7e83..098ae2dc7 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala
@@ -16,7 +16,8 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types._
-import org.scalatest.{FunSuite, Matchers}
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.common.error.ErrorMessageFactory
import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
@@ -25,7 +26,7 @@ import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.udf.UDFLibrary
import za.co.absa.enceladus.utils.validation.ValidationException
-class StandardizationInterpreter_ArraySuite extends FunSuite with SparkTestBase with LoggerTestBase with Matchers {
+class StandardizationInterpreter_ArraySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with Matchers {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala
index 256a220df..bd4a5856b 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala
@@ -16,13 +16,14 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types.{BinaryType, Metadata, MetadataBuilder, StructField, StructType}
-import org.scalatest.{FunSuite, Matchers}
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
import za.co.absa.enceladus.utils.validation.ValidationException
-class StandardizationInterpreter_BinarySuite extends FunSuite with SparkTestBase with LoggerTestBase with Matchers {
+class StandardizationInterpreter_BinarySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with Matchers {
import spark.implicits._
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala
index 66baab7fb..b93a45e58 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala
@@ -18,12 +18,12 @@ package za.co.absa.enceladus.standardization.interpreter
import java.sql.Date
import org.apache.spark.sql.types.{DateType, MetadataBuilder, StructField, StructType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreter_DateSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StandardizationInterpreter_DateSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala
index d4431cf60..325f6389b 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala
@@ -19,13 +19,13 @@ import java.text.{DecimalFormat, NumberFormat}
import java.util.Locale
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreter_DecimalSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StandardizationInterpreter_DecimalSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala
index 4a7458257..d1136fb5d 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.standardization.interpreter
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StandardizationInterpreter_FractionalSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
@@ -72,7 +72,7 @@ class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTest
FractionalRow("02-Null", Option(0), None, Seq(
ErrorMessage.stdNullErr("floatField"))),
FractionalRow("03-Long", Option(9.223372E18F), Option(-9.223372036854776E18)),
- FractionalRow("04-infinity", Option(0), None, Seq(
+ FractionalRow("04-infinity", Option(0), None, Seq(
ErrorMessage.stdCastErr("floatField", "-Infinity"),
ErrorMessage.stdCastErr("doubleField", "Infinity"))),
FractionalRow("05-Really big", Option(0), None, Seq(
@@ -132,7 +132,7 @@ class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTest
FractionalRow("02-Null", Option(0), None, Seq(
ErrorMessage.stdNullErr("floatField"))),
FractionalRow("03-Long", Option(9.223372E18F), Option(-9.223372036854776E18)),
- FractionalRow("04-Infinity", Option(0), None, Seq(
+ FractionalRow("04-Infinity", Option(0), None, Seq(
ErrorMessage.stdCastErr("floatField", "-Infinity"),
ErrorMessage.stdCastErr("doubleField", "Infinity"))),
FractionalRow("05-Really big", Option(0), Option(reallyBig), Seq(
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala
index 629df32bd..b3001e5d2 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala
@@ -19,13 +19,13 @@ import java.text.{DecimalFormat, NumberFormat}
import java.util.Locale
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreter_IntegralSuite extends FunSuite with SparkTestBase with LoggerTestBase{
+class StandardizationInterpreter_IntegralSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{
import spark.implicits._
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala
index 713a25945..e006257db 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala
@@ -18,12 +18,12 @@ package za.co.absa.enceladus.standardization.interpreter
import java.sql.Timestamp
import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType, TimestampType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-class StandardizationInterpreter_TimestampSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StandardizationInterpreter_TimestampSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
private implicit val udfLib: UDFLibrary = new UDFLibrary
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala
index 8a4d2dcde..438d85aaf 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala
@@ -19,7 +19,7 @@ import java.sql.{Date, Timestamp}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.error.ErrorMessage
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import za.co.absa.enceladus.utils.udf.UDFLibrary
@@ -34,7 +34,7 @@ case class MyWrapperStd(counterparty: MyHolder, errCol: Seq[ErrorMessage])
case class Time(id: Int, date: String, timestamp: String)
case class StdTime(id: Int, date: Date, timestamp: Timestamp, errCol: List[ErrorMessage])
-class StdInterpreterSuite extends FunSuite with SparkTestBase with LoggerTestBase {
+class StdInterpreterSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase {
import spark.implicits._
case class subCC(subFieldA: Integer, subFieldB: String)
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala
index d866e8dba..5b739cc84 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala
@@ -16,10 +16,10 @@
package za.co.absa.enceladus.standardization.interpreter.stages
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class PlainSchemaGeneratorSuite extends FunSuite with SparkTestBase {
+class PlainSchemaGeneratorSuite extends AnyFunSuite with SparkTestBase {
private val schema = StructType(Seq(
StructField("a", IntegerType, nullable = false),
StructField("b", IntegerType, nullable = false, new MetadataBuilder().putString("meta", "data").build),
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala
index e36789830..f6f1c0120 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus.standardization.interpreter.stages
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import za.co.absa.enceladus.utils.types.TypedStructField.TypedStructFieldTagged
import za.co.absa.enceladus.utils.types.parsers.NumericParser
@@ -25,7 +25,7 @@ import za.co.absa.enceladus.utils.udf.{UDFLibrary, UDFResult}
import scala.util.Success
-class TypeParserSuite extends FunSuite with SparkTestBase {
+class TypeParserSuite extends AnyFunSuite with SparkTestBase {
private implicit val udfLib: UDFLibrary = new UDFLibrary
private implicit val defaults: Defaults = GlobalDefaults
diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala
index 3c4ef410a..f940524ef 100644
--- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala
+++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala
@@ -20,7 +20,7 @@ import java.sql.{Date, Timestamp}
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.standardization.interpreter.dataTypes.ParseOutput
import za.co.absa.enceladus.standardization.interpreter.stages.TypeParserSuiteTemplate._
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
@@ -28,7 +28,7 @@ import za.co.absa.enceladus.utils.time.DateTimePattern
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.udf.UDFLibrary
-trait TypeParserSuiteTemplate extends FunSuite with SparkTestBase {
+trait TypeParserSuiteTemplate extends AnyFunSuite with SparkTestBase {
private implicit val udfLib: UDFLibrary = new UDFLibrary
private implicit val defaults: Defaults = GlobalDefaults
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala
new file mode 100644
index 000000000..862491650
--- /dev/null
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.utils.fs
+
+/**
+ * A set of functions to help with the date partitioning and version control
+ */
+
+trait DistributedFsUtils {
+
+ /**
+ * Check if a given path exists on the distributed Fs
+ */
+ def exists(distPath: String): Boolean
+
+ def read(distPath: String): String
+
+ /**
+ * Returns distributed directory size in bytes
+ */
+ def getDirectorySize(distPath: String): Long
+
+ /**
+ * Returns distributed directory size in bytes, skipping hidden files and directories (starting from '_' or '.').
+ *
+ * @param distPath A path to a directory or a file.
+ * @return Directory size in bytes
+ */
+ def getDirectorySizeNoHidden(distPath: String): Long
+
+ /**
+ * Checks if the distributed-FS path contains non-splittable files
+ */
+ def isNonSplittable(distPath: String): Boolean
+
+ /**
+ * Deletes a distributed-FS directory and all its contents recursively
+ */
+ def deleteDirectoryRecursively(distPath: String): Unit
+
+ /**
+ * Finds the latest version given a publish folder on distributed-FS
+ *
+ * @param publishPath The distributed-FS path to the publish folder containing versions
+ * @param reportDate The string representation of the report date used to infer the latest version
+ * @return the latest version or 0 in case no versions exist
+ */
+ def getLatestVersion(publishPath: String, reportDate: String): Int
+
+}
+
+object DistributedFsUtils {
+ val nonSplittableExtensions = List("gz")
+}
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala
similarity index 73%
rename from utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala
rename to utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala
index 37b99f221..dad506d11 100644
--- a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala
@@ -31,17 +31,17 @@ import scala.util.Try
* A set of functions to help with the date partitioning and version control
*/
-class FileSystemVersionUtils(conf: Configuration) {
+class HdfsUtils(conf: Configuration) extends DistributedFsUtils {
- private val log = LogManager.getLogger("enceladus.utils.fs")
+ private val log = LogManager.getLogger("enceladus.utils.fs.HdfsUtils")
private val fs = FileSystem.get(conf)
/**
- * Split path URI by separating scheme+server and path part
+ * Split HDFS path URI by separating scheme+server and path part
* Example:
* hdfs://server:8020/user/data/input -> (hdfs://server:8020, /user/data/input)
* /user/data/input -> ("", /user/data/input)
*/
- def splitUriPath(path: Path): (String, String) = {
+ private[fs] def splitUriPath(path: Path): (String, String) = {
val uri = path.toUri
val scheme = uri.getScheme
val authority = uri.getAuthority
@@ -73,39 +73,24 @@ class FileSystemVersionUtils(conf: Configuration) {
})
}
- /**
- * Creates a temporary directory in the local filesystem.
- *
- * @param prefix A prefix to use for the temporary directory.
- * @return A path to a temporary directory.
- */
- def getLocalTemporaryDirectory(prefix: String): String = {
- val tmpPath = Files.createTempDirectory(prefix)
- tmpPath.toAbsolutePath.toString
- }
+
/**
* Check if a given path exists on HDFS
*/
- def hdfsExists(path: String): Boolean = {
+ override def exists(path: String): Boolean = {
log.info(s"Cheking if $path exists")
fs.exists(new Path(path))
}
- /**
- * Check if a given files exists on the local file system
- */
- def localExists(path: String): Boolean = {
- new File(path).exists()
- }
/**
* Function which determines whether the file exists on HDFS or local file system
*
*/
- def exists(path: String): Boolean = {
+ def existsLocallyOrDistributed(path: String): Boolean = {
val local = try {
- localExists(path)
+ LocalFsUtils.localExists(path)
} catch {
case e: IllegalArgumentException => false
}
@@ -114,7 +99,7 @@ class FileSystemVersionUtils(conf: Configuration) {
true
} else {
val hdfs = try {
- hdfsExists(path)
+ exists(path)
} catch {
case e: IllegalArgumentException => false
case e: ConnectException => false
@@ -136,12 +121,12 @@ class FileSystemVersionUtils(conf: Configuration) {
* @return A path to a file in the local filesystem.
*/
@throws[FileNotFoundException]
- def getLocalPathToFile(path: String): String = {
- val absolutePath = replaceHome(path)
- if (localExists(absolutePath)) {
+ def getLocalPathToFileOrCopyToLocal(path: String): String = {
+ val absolutePath = LocalFsUtils.replaceHome(path)
+ if (LocalFsUtils.localExists(absolutePath)) {
absolutePath
- } else if (hdfsExists(path)) {
- hdfsFileToLocalTempFile(path)
+ } else if (exists(path)) {
+ copyDistributedFileToLocalTempFile(path)
} else {
throw new FileNotFoundException(s"File not found: $path.")
}
@@ -155,33 +140,23 @@ class FileSystemVersionUtils(conf: Configuration) {
* @return The file's content.
*/
@throws[FileNotFoundException]
- def getFileContent(path: String): String = {
- val absolutePath = replaceHome(path)
- if (localExists(absolutePath)) {
- readLocalFile(absolutePath)
- } else if (hdfsExists(path)) {
- hdfsRead(path)
+ def getLocalOrDistributedFileContent(path: String): String = {
+ val absolutePath = LocalFsUtils.replaceHome(path)
+ if (LocalFsUtils.localExists(absolutePath)) {
+ LocalFsUtils.readLocalFile(absolutePath)
+ } else if (exists(path)) {
+ read(path)
} else {
throw new FileNotFoundException(s"File not found: $path.")
}
}
- /**
- * Reads a local file fully and returns its content.
- *
- * @param path A path to a file.
- * @return The file's content.
- */
- def readLocalFile(path: String): String = {
- Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8).toArray.mkString("\n")
- }
-
/**
* Read a file from HDFS and stores in local file system temp file
*
* @return The path of the local temp file
*/
- def hdfsFileToLocalTempFile(hdfsPath: String): String = {
+ def copyDistributedFileToLocalTempFile(hdfsPath: String): String = {
val in = fs.open(new Path(hdfsPath))
val content = Array.fill(in.available())(0.toByte)
in.readFully(content)
@@ -189,10 +164,13 @@ class FileSystemVersionUtils(conf: Configuration) {
tmpFile.deleteOnExit()
FileUtils.writeByteArrayToFile(tmpFile, content)
tmpFile.getAbsolutePath
+
+ // why not use
+ // fs.copyToLocalFile(false, new Path(hdfsPath), new Path("someLocalName"), true)
}
- def hdfsRead(path: String): String = {
- val in = fs.open(new Path(path))
+ override def read(distPath: String): String = {
+ val in = fs.open(new Path(distPath))
val content = Array.fill(in.available())(0.toByte)
in.readFully(content)
new String(content, "UTF-8")
@@ -242,13 +220,11 @@ class FileSystemVersionUtils(conf: Configuration) {
}
/**
- * Checks if the path contains non-splittable files
+ * Checks if the HDFS path contains non-splittable files
*/
- def isNonSplittable(path: String): Boolean = {
- val nonSplittableExtensions = List("gz")
-
+ override def isNonSplittable(path: String): Boolean = {
val files = getFilePaths(path)
- files.exists(file => nonSplittableExtensions.exists(file.endsWith))
+ files.exists(file => DistributedFsUtils.nonSplittableExtensions.exists(file.endsWith))
}
/**
@@ -256,13 +232,13 @@ class FileSystemVersionUtils(conf: Configuration) {
* Example:
* /path/to/dir -> ("path/to/dir/file1.extension", "path/to/dir/file2.extension")
*/
- def getFilePaths(path: String): Array[String] = {
+ private def getFilePaths(path: String): Array[String] = {
val hdfsPath = new Path(path)
fs.listStatus(hdfsPath).map(_.getPath.toString)
}
/**
- * Deletes a directory and all its contents recursively
+ * Deletes a HDFS directory and all its contents recursively
*/
def deleteDirectoryRecursively(path: String): Unit = {
log.info(s"Deleting '$path' recursively...")
@@ -279,7 +255,7 @@ class FileSystemVersionUtils(conf: Configuration) {
}
/**
- * Finds the latest version given a publish folder
+ * Finds the latest version given a publish folder on HDFS
*
* @param publishPath The HDFS path to the publish folder containing versions
* @param reportDate The string representation of the report date used to infer the latest version
@@ -299,18 +275,4 @@ class FileSystemVersionUtils(conf: Configuration) {
}
}
- /**
- * Replaces tilde ('~') with the home dir.
- *
- * @param path An input path.
- * @return An absolute output path.
- */
- def replaceHome(path: String): String = {
- if (path.matches("^~.*")) {
- //not using replaceFirst as it interprets the backslash in Windows path as escape character mangling the result
- System.getProperty("user.home") + path.substring(1)
- } else {
- path
- }
- }
}
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala
new file mode 100644
index 000000000..9c1bf11de
--- /dev/null
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.utils.fs
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+
+import org.apache.log4j.LogManager
+
+/**
+ * A set of functions to help with the date partitioning and version control
+ */
+
+object LocalFsUtils {
+
+ private val log = LogManager.getLogger("enceladus.utils.fs.LocalFsUtils")
+
+ /**
+ * Creates a temporary directory in the local filesystem.
+ *
+ * @param prefix A prefix to use for the temporary directory.
+ * @return A path to a temporary directory.
+ */
+ def getLocalTemporaryDirectory(prefix: String): String = {
+ val tmpPath = Files.createTempDirectory(prefix)
+ tmpPath.toAbsolutePath.toString
+ }
+
+
+ /**
+ * Check if a given files exists on the local file system
+ */
+ def localExists(path: String): Boolean = {
+ new File(path).exists()
+ }
+
+ /**
+ * Reads a local file fully and returns its content.
+ *
+ * @param path A path to a file.
+ * @return The file's content.
+ */
+ def readLocalFile(path: String): String = {
+ Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8).toArray.mkString("\n")
+ }
+
+ /**
+ * Replaces tilde ('~') with the home dir.
+ *
+ * @param path An input path.
+ * @return An absolute output path.
+ */
+ def replaceHome(path: String): String = {
+ if (path.matches("^~.*")) {
+ //not using replaceFirst as it interprets the backslash in Windows path as escape character mangling the result
+ System.getProperty("user.home") + path.substring(1)
+ } else {
+ path
+ }
+ }
+}
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala
new file mode 100644
index 000000000..f41c9e980
--- /dev/null
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.utils.fs
+
+import org.slf4j.{Logger, LoggerFactory}
+import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider
+import software.amazon.awssdk.regions.Region
+import software.amazon.awssdk.services.s3.S3Client
+import software.amazon.awssdk.services.s3.model.{S3Location => _, _}
+import za.co.absa.atum.persistence.{S3KmsSettings, S3Location}
+import za.co.absa.atum.utils.S3Utils
+import za.co.absa.atum.utils.S3Utils.StringS3LocationExt
+
+import scala.annotation.tailrec
+import scala.collection.JavaConverters._
+import scala.util.{Failure, Success, Try}
+
+// kmsSettings: S3KmsSettings in not currently used, but would be necessary if any SDK calls needed to put data on S3
+case class S3FsUtils(region: Region, kmsSettings: S3KmsSettings)(implicit credentialsProvider: AwsCredentialsProvider)
+ extends DistributedFsUtils {
+
+ protected val log: Logger = LoggerFactory.getLogger(this.getClass)
+ private[fs] val maxKeys = 1000 // overridable default
+
+ val s3Client: S3Client = getS3Client
+
+ /**
+ * Check if a given path exists on the distributed Fs
+ */
+ override def exists(distPath: String): Boolean = {
+ val location = distPath.toS3Location(region)
+
+ val headRequest = HeadObjectRequest
+ .builder().bucket(location.bucketName).key(location.path)
+ .build()
+
+ // there seems to be no doesObjectExist method as of current version https://github.com/aws/aws-sdk-java-v2/issues/392
+ Try {
+ s3Client.headObject(headRequest)
+ } match {
+ case Success(_) =>
+ true
+ case Failure(_: NoSuchKeyException) =>
+ false
+ case Failure(e) => throw e
+ }
+ }
+
+ override def read(distPath: String): String = {
+ val location = distPath.toS3Location(region)
+
+ val getRequest = GetObjectRequest
+ .builder().bucket(location.bucketName).key(location.path)
+ .build()
+
+ val content = s3Client.getObjectAsBytes(getRequest).asUtf8String()
+
+ content
+ }
+
+ override def getDirectorySize(distPath: String): Long = getDirectorySize(distPath, _ => true)
+
+ /**
+ * Returns distributed directory size in bytes
+ */
+ private[fs] def getDirectorySize(distPath: String, keyNameFilter: String => Boolean): Long = {
+
+ // setup accumulation
+ val location = distPath.toS3Location(region)
+ val initSize = 0L
+
+ def accumulateSizeOp(previousTotalSize: Long, response: ListObjectsV2Response): Long = {
+ val objects = response.contents().asScala
+ val totalSize = objects
+ .filter(obj => keyNameFilter(obj.key))
+ .foldLeft(0L) { (currentSize: Long, nextObject: S3Object) => currentSize + nextObject.size }
+
+ previousTotalSize + totalSize
+ }
+
+ listAndAccumulateRecursively(location, accumulateSizeOp, initSize)
+ }
+
+ /**
+ * Hidden files = starting with `_` or `.` This method will return true for hidden keys.
+ *
+ * @param key path on s3
+ * @return e.g. `/path/to/.hidden` => true, `/path/to/non-hidden` => false
+ */
+ private[fs] def isKeyHidden(key: String): Boolean = {
+ val fn = key.split('/').last
+
+ (fn.startsWith("_")) || (fn.startsWith("."))
+ }
+
+ /**
+ * Returns distributed directory size in bytes, skipping hidden files and directories (starting from '_' or '.').
+ *
+ * @param distPath A path to a directory or a file.
+ * @return Directory size in bytes
+ */
+ override def getDirectorySizeNoHidden(distPath: String): Long = getDirectorySize(distPath, key => !isKeyHidden(key))
+
+
+ private[fs] def isKeyNonSplittable(key: String): Boolean = {
+ val fn = key.split('/').last
+
+ DistributedFsUtils.nonSplittableExtensions.exists(fn.endsWith)
+ }
+
+ /**
+ * Checks if the distributed-FS path contains non-splittable files
+ */
+ override def isNonSplittable(distPath: String): Boolean = {
+ // setup accumulation
+ val location = distPath.toS3Location(region)
+ val initFoundValue = false
+ // we want to break out of the recursion if a non-splittable is found, because it cannot ever be unfound.
+ val breakOutCase = Some(true)
+
+ def accumulateFoundOp(previouslyFound: Boolean, response: ListObjectsV2Response): Boolean = {
+ val objects = response.contents().asScala
+ val nonSplittableFound = objects.exists(obj => isKeyNonSplittable(obj.key))
+
+ previouslyFound || nonSplittableFound // true if ever found
+ }
+
+ listAndAccumulateRecursively(location, accumulateFoundOp, initFoundValue, breakOutCase)
+ }
+
+ /**
+ * Deletes a distributed-FS directory and all its contents recursively
+ */
+ override def deleteDirectoryRecursively(distPath: String): Unit = {
+
+ // setup accumulation
+ val location = distPath.toS3Location(region)
+
+ def accumulateSizeOp(acc: Unit, response: ListObjectsV2Response): Unit = { // side-effect, "accumulates" to unit
+ val objects = response.contents().asScala
+ if (objects.nonEmpty) {
+ deleteKeys(location.bucketName, objects.map(_.key))
+ }
+ }
+
+ listAndAccumulateRecursively(location, accumulateSizeOp, ())
+ }
+
+ private[fs] def deleteKeys(bucketName: String, keys: Seq[String]): Unit = {
+ require(keys.nonEmpty)
+
+ val objIds = keys.map(k => ObjectIdentifier.builder().key(k).build())
+ val request: DeleteObjectsRequest = DeleteObjectsRequest.builder().bucket(bucketName)
+ .delete(Delete.builder().objects(objIds.asJava).build())
+ .build()
+
+ val delResp: DeleteObjectsResponse = s3Client.deleteObjects(request)
+
+ if (delResp.errors().size() > 0) {
+ log.warn(s"Errors while deleting (${delResp.errors.size}):\n ${delResp.errors.asScala.map(_.message()).mkString("\n")}")
+ }
+ }
+
+ /**
+ * Finds the latest version given a publish folder on distributed-FS
+ *
+ * @param publishPath The distributed-FS path to the publish folder containing versions
+ * @param reportDate The string representation of the report date used to infer the latest version
+ * @return the latest version or 0 in case no versions exist
+ */
+ override def getLatestVersion(publishPath: String, reportDate: String): Int = {
+
+ // setup accumulation
+ val location = publishPath.toS3Location(region)
+ val initVersion = 0
+
+ // looking for $publishPath/enceladus_info_date=$reportDate\enceladus_info_version=$version
+ val prefix = s"${location.path}/enceladus_info_date=$reportDate/enceladus_info_version="
+ val prefixedLocation = location.copy(path = prefix)
+
+ def accumulateSizeOp(previousMaxVersion: Int, response: ListObjectsV2Response): Int = {
+ val objects = response.contents().asScala
+
+ val existingVersions = objects
+ .map(_.key)
+ .flatMap { key =>
+ assert(key.startsWith(prefix), s"Retrieved keys should start with $prefix, but precondition fails for $key")
+ val noPrefix = key.stripPrefix(prefix)
+ Try {
+ noPrefix.takeWhile(_.isDigit).toInt // may not hold valid int >= 1
+ } match {
+ case Success(version) if version >= 1 => Some(version)
+ case _ => None
+ }
+ }
+ .toSet
+
+ if (existingVersions.isEmpty) {
+ previousMaxVersion
+ } else {
+ Math.max(previousMaxVersion, existingVersions.max)
+ }
+ }
+
+ listAndAccumulateRecursively(prefixedLocation, accumulateSizeOp, initVersion)
+ }
+
+ private[fs] def getS3Client: S3Client = S3Utils.getS3Client(region, credentialsProvider)
+
+ /**
+ * General method to list and accumulate the objects info. Note, that the method strives to be memory-efficient -
+ * i.e. accumulate the current batch first and then load the next batch (instead of the naive "load all first, process later"
+ *
+ * @param location s3location - bucket & path are used
+ * @param accumulateOp operation to accumulate
+ * @param initialAccValue (initial/carry-over) accumulator value
+ * @param breakOut allows to break the recursion prematurely when the defined value equals the currently accumulated value.
+ * Default: None = no break out
+ * @tparam T accumulator value type
+ * @return accumulated value
+ */
+ private def listAndAccumulateRecursively[T](location: S3Location,
+ accumulateOp: (T, ListObjectsV2Response) => T,
+ initialAccValue: T,
+ breakOut: Option[T] = None): T = {
+
+ log.debug(s"listAndAccumulateRecursively($location, $accumulateOp, $initialAccValue, $breakOut)")
+
+ @tailrec
+ def listAndAccumulateRecursivelyAcc(contToken: Option[String], acc: T): T = {
+ log.debug(s"listAndAccumulateRecursivelyAcc($contToken, $acc)")
+
+ val listObjectsBuilder = ListObjectsV2Request.builder
+ .bucket(location.bucketName)
+ .prefix(location.path)
+ .maxKeys(maxKeys)
+ val listObjectsRequest = contToken.fold(listObjectsBuilder.build)(listObjectsBuilder.continuationToken(_).build)
+
+ val response: ListObjectsV2Response = s3Client.listObjectsV2(listObjectsRequest)
+ val totalAccumulated: T = accumulateOp(acc, response) // result of previous with the currently accumulated together
+
+ // the caller is able define a short-circuiting condition - at which no more processing is needed, hence we "break out" here
+ if (breakOut.contains(totalAccumulated)) {
+ log.debug(s"Breakout at totalAccumulated value $totalAccumulated")
+ totalAccumulated
+ } else if (!response.isTruncated) {
+ log.debug(s"Final recursion level reached.")
+ totalAccumulated
+ } else {
+ // need to recurse & accumulate deeper
+ listAndAccumulateRecursivelyAcc(Some(response.nextContinuationToken), totalAccumulated)
+ }
+ }
+
+ // run the recursive call
+ listAndAccumulateRecursivelyAcc(contToken = None, acc = initialAccValue)
+ }
+}
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala
new file mode 100644
index 000000000..d5dfe8b90
--- /dev/null
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.utils.fs.example
+
+import org.slf4j.LoggerFactory
+import software.amazon.awssdk.regions.Region
+import za.co.absa.atum.persistence.S3KmsSettings
+import za.co.absa.atum.utils.S3Utils
+import za.co.absa.enceladus.utils.fs.S3FsUtils
+
+// open: remove or create a integtest like this instead.
+// implementation is directly suited to be runnable locally with a saml profile.
+object S3FsUtilsTestJob {
+
+ private val log = LoggerFactory.getLogger(this.getClass)
+ private val bucketName = "putYourBucketBucketNameHere"
+
+ def main(args: Array[String]): Unit = {
+ val basePath = s"s3://$bucketName/exampleS3Path"
+
+ // This sample example relies on local credentials profile named "saml" with access to the s3 location defined below
+ implicit val samlCredentialsProvider = S3Utils.getLocalProfileCredentialsProvider("saml")
+ val kmsKeyId = System.getenv("TOOLING_KMS_KEY_ID") // load from an environment property in order not to disclose it here
+ log.info(s"kmsKeyId from env loaded = ${kmsKeyId.take(10)}...")
+
+ val s3utils = new S3FsUtils(Region.EU_WEST_1, S3KmsSettings(kmsKeyId)) {
+ override val maxKeys = 5 // to test recursive listing/action
+ }
+
+ log.info(s"dir size of $basePath is:" + s3utils.getDirectorySize(basePath))
+ log.info(s"dir size (no hidden) of $basePath is:" + s3utils.getDirectorySizeNoHidden(basePath))
+
+ log.info(s"should exist:" + s3utils.exists(s"$basePath/1/2019/11/27/1/_INFO"))
+ log.info(s"should not exist:" + s3utils.exists(s"$basePath/1/2019/11/27/1/_INFObogus"))
+
+ log.info("found version (1): "
+ + s3utils.getLatestVersion(s"s3://$bucketName/superhero/publish", "2020-08-06"))
+
+ log.info("found no version (0): "
+ + s3utils.getLatestVersion(s"s3://$bucketName/aaa", "2020-08-06"))
+
+ log.info(s"reading file content:" + s3utils.read(s"$basePath/1/2019/11/27/1/_INFO").take(50))
+
+ log.info(s"should find no gz-s:" + s3utils.isNonSplittable(s"s3://$bucketName/gz-list/nogz"))
+ log.info(s"should find some gz-s (and breakOut):" +
+ s3utils.isNonSplittable(s"s3://$bucketName/gz-list/somegz"))
+
+ val deletePath = s"s3://$bucketName/delete"
+ log.info(s"deleting $deletePath: " + s3utils.deleteDirectoryRecursively(deletePath))
+ }
+
+}
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala
index 469cd6cae..97aeed022 100644
--- a/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala
@@ -20,7 +20,7 @@ import org.apache.spark.sql.functions.{col, size, sum}
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.atum.core.Atum
import za.co.absa.enceladus.utils.error.ErrorMessage
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import za.co.absa.enceladus.utils.fs.DistributedFsUtils
import za.co.absa.enceladus.utils.general.ProjectMetadataTools
import za.co.absa.enceladus.utils.schema.SchemaUtils
@@ -45,14 +45,13 @@ object PerformanceMetricTools {
outputPath: String,
loginUserName: String,
cmdLineArgs: String)
- (implicit spark: SparkSession): Unit = {
+ (implicit spark: SparkSession, fsUtils: DistributedFsUtils): Unit = {
// Spark job configuration
val sc = spark.sparkContext
// The number of executors minus the driver
val numberOfExecutors = sc.getExecutorMemoryStatus.keys.size - 1
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
// Directory sizes and size ratio
val inputDirSize = fsUtils.getDirectorySize(inputPath)
val inputDataSize = fsUtils.getDirectorySizeNoHidden(inputPath)
@@ -96,12 +95,10 @@ object PerformanceMetricTools {
outputPath: String,
loginUserName: String,
cmdLineArgs: String
- ): Unit = {
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+ )(implicit fsUtils: DistributedFsUtils): Unit = {
// Directory sizes and size ratio
val inputDirSize = fsUtils.getDirectorySize(inputPath)
- val inputDataSize = fsUtils.getDirectorySizeNoHidden(inputPath)
val outputDirSize = fsUtils.getDirectorySize(outputPath)
val outputDataSize = fsUtils.getDirectorySizeNoHidden(outputPath)
diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala
index abe8cbf64..158a4a728 100644
--- a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala
+++ b/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala
@@ -15,14 +15,14 @@
package za.co.absa.enceladus.utils.testUtils
-import org.scalatest.FunSuiteLike
+import org.scalatest.funsuite.AnyFunSuiteLike
import scala.language.reflectiveCalls
import scala.reflect.ClassTag
import scala.reflect.runtime.universe
trait SparkJobRunnerMethods {
- this: FunSuiteLike =>
+ this: AnyFunSuiteLike =>
private def runSparkJob[T](implicit ct: ClassTag[T]): Unit = {
type MainClass = {def main(args: Array[String]): Unit}
diff --git a/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
new file mode 100644
index 000000000..1f0955d45
--- /dev/null
+++ b/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
@@ -0,0 +1 @@
+mock-maker-inline
diff --git a/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala
index c5f7f5c2a..d93ce4afc 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.validation.{SchemaPathValidator, ValidationError, ValidationIssue, ValidationWarning}
/**
* A test suite for validation of schema path fields existence.
*/
-class SchemaPathValidatorSuite extends FunSuite {
+class SchemaPathValidatorSuite extends AnyFunSuite {
private val schema = StructType(
Array(
diff --git a/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala
index 44000db9a..9fd61f0cc 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala
@@ -16,7 +16,7 @@
package za.co.absa.enceladus
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.LoggerTestBase
import za.co.absa.enceladus.utils.validation.SchemaValidator
@@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.validation.SchemaValidator
* A test suite for validation of scalar data types
*/
//noinspection ZeroIndexToHead
-class SchemaValidationSuite extends FunSuite with LoggerTestBase{
+class SchemaValidationSuite extends AnyFunSuite with LoggerTestBase{
test("Scalar types should be validated") {
val schema = StructType(
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala
index e3c3188ec..7d65b0b58 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala
@@ -17,7 +17,7 @@ package za.co.absa.enceladus.utils
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import org.slf4j.LoggerFactory
import za.co.absa.spark.hats.Extensions._
import za.co.absa.enceladus.utils.explode.ExplodeTools
@@ -25,7 +25,7 @@ import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.schema.SchemaUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class ExplosionSuite extends FunSuite with SparkTestBase {
+class ExplosionSuite extends AnyFunSuite with SparkTestBase {
private val logger = LoggerFactory.getLogger(this.getClass)
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala
index a5f5e0d08..10ffbb094 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala
@@ -17,13 +17,13 @@ package za.co.absa.enceladus.utils.broadcast
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Row}
-import org.scalatest.WordSpec
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.utils.error.Mapping
import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase}
import scala.collection.mutable
-class BroadcastUtilsSuite extends WordSpec with SparkTestBase with LoggerTestBase {
+class BroadcastUtilsSuite extends AnyWordSpec with SparkTestBase with LoggerTestBase {
import spark.implicits._
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala
index b71fbd917..4de7ae9a9 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala
@@ -17,11 +17,11 @@ package za.co.absa.enceladus.utils.broadcast
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{NumericType, StringType, StructType}
-import org.scalatest.WordSpec
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.utils.general.JsonUtils
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class LocalMappingTableSuite extends WordSpec with SparkTestBase {
+class LocalMappingTableSuite extends AnyWordSpec with SparkTestBase {
import spark.implicits._
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala
index 2cbe22413..aaa50e799 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala
@@ -16,11 +16,13 @@
package za.co.absa.enceladus.utils.config
import com.typesafe.config.ConfigFactory
-import org.scalatest.{FlatSpec, Matchers}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.utils.config.ConfigUtils.ConfigImplicits
+
import scala.collection.JavaConverters._
-class ConfigUtilsSuite extends FlatSpec with Matchers {
+class ConfigUtilsSuite extends AnyFlatSpec with Matchers {
val conf = ConfigFactory.parseMap(Map(
"some.string.key" -> "string1",
diff --git a/utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala
similarity index 63%
rename from utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala
rename to utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala
index 1df5c103d..5f7b39e8c 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala
@@ -13,39 +13,39 @@
* limitations under the License.
*/
-package za.co.absa.enceladus
+package za.co.absa.enceladus.utils.fs
import java.io.FileNotFoundException
import org.apache.hadoop.fs.Path
-import org.scalatest.{Matchers, WordSpec}
-import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
/**
* Unit tests for File system utils
*/
-class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase {
- val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration)
+class HdfsUtilsSpec extends AnyWordSpec with Matchers with SparkTestBase {
+ val hdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration)
"splitUriPath" should {
"split URI and path" in {
val path = new Path("hdfs://some-host:8020/user/data/input")
- val (prefix, rawPath) = fsUtils.splitUriPath(path)
+ val (prefix, rawPath) = hdfsUtils.splitUriPath(path)
prefix shouldEqual "hdfs://some-host:8020"
rawPath shouldEqual "/user/data/input"
}
"not split a path without URI prefix" in {
val path = new Path("/projects/coreconformance/publish/dataset")
- val (prefix, rawPath) = fsUtils.splitUriPath(path)
+ val (prefix, rawPath) = hdfsUtils.splitUriPath(path)
prefix shouldEqual ""
rawPath shouldEqual "/projects/coreconformance/publish/dataset"
}
"not split relative path" in {
val path = new Path("data/input")
- val (prefix, rawPath) = fsUtils.splitUriPath(path)
+ val (prefix, rawPath) = hdfsUtils.splitUriPath(path)
prefix shouldEqual ""
rawPath shouldEqual "data/input"
}
@@ -54,27 +54,27 @@ class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase {
"getDirectorySize" should {
"throw an exception if the specified path does not exist" in {
intercept[FileNotFoundException] {
- fsUtils.getDirectorySize("src/test/resources/test_data/not_exist")
+ hdfsUtils.getDirectorySize("src/test/resources/test_data/not_exist")
}
}
"return the file size if a single file is specified" in {
- val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir/dummy.txt")
+ val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir/dummy.txt")
assert(dirSize == 20L)
}
"return the file size if a single hidden file is specified" in {
- val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir/_hidden_dummy.txt")
+ val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir/_hidden_dummy.txt")
assert(dirSize == 27L)
}
"return the size of all files in a directory" in {
- val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir")
+ val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir")
assert(dirSize == 47L)
}
"return the size of all files recursively" in {
- val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir2")
+ val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir2")
assert(dirSize == 87L)
}
}
@@ -82,32 +82,32 @@ class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase {
"getDirectorySizeNoHidden" should {
"throw an exception if the specified path does not exist" in {
intercept[FileNotFoundException] {
- fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/not_exist")
+ hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/not_exist")
}
}
"return the file size if a single file is specified" in {
- val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/dummy.txt")
+ val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/dummy.txt")
assert(dirSize == 20L)
}
"return the file size if a single hidden file is specified" in {
- val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/_hidden_dummy.txt")
+ val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/_hidden_dummy.txt")
assert(dirSize == 27L)
}
"return the size of all non-hidden files in a directory" in {
- val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir")
+ val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir")
assert(dirSize == 20L)
}
"return the size of all non-hidden files recursively along non-hidden paths" in {
- val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2")
+ val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2")
assert(dirSize == 40L)
}
"return the size of all non-hidden files if a hidden directory is specified explicitly" in {
- val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2/_inner_dir")
+ val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2/_inner_dir")
assert(dirSize == 20L)
}
}
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala
new file mode 100644
index 000000000..6d558c79d
--- /dev/null
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala
@@ -0,0 +1,394 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.enceladus.utils.fs
+
+import org.mockito.captor.{ArgCaptor, Captor}
+import org.mockito.scalatest.IdiomaticMockito
+import org.mockito.{ArgumentMatchers, Mockito}
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider
+import software.amazon.awssdk.core.ResponseBytes
+import software.amazon.awssdk.regions.Region
+import software.amazon.awssdk.services.s3.S3Client
+import software.amazon.awssdk.services.s3.model._
+import za.co.absa.atum.persistence.S3KmsSettings
+
+import scala.collection.JavaConverters._
+
+class S3FsUtilsSuite extends AnyFlatSpec with IdiomaticMockito with Matchers {
+
+ val kmsSettigns = S3KmsSettings("testingKeyId123")
+ val region = Region.EU_WEST_2
+
+ implicit val credentialsProvider = DefaultCredentialsProvider.create()
+
+ // common fixture for all tests
+ def fixture = new {
+ val mockedS3Client = mock[S3Client]
+ val mockedS3FsUtils = new S3FsUtils(region, kmsSettigns) {
+ override def getS3Client: S3Client = mockedS3Client
+
+ override val maxKeys = 3 // to test recursion for listing
+ }
+ }
+
+ "S3FsUtilsTest" should "detect exiting file" in {
+ val f = fixture
+ val path = "s3://bucket1/path/to/existing.file"
+
+ // mock S3 response for exist
+ val mockedResponse: HeadObjectResponse = mock[HeadObjectResponse]
+ Mockito.when(f.mockedS3Client.headObject(any[HeadObjectRequest])).thenReturn(mockedResponse)
+
+ val existResult = f.mockedS3FsUtils.exists(path)
+
+ // verify request content
+ val requestCaptor: Captor[HeadObjectRequest] = ArgCaptor[HeadObjectRequest]
+ Mockito.verify(f.mockedS3Client).headObject(requestCaptor.capture)
+ val capturedGetRequest = requestCaptor.value
+
+ capturedGetRequest.bucket shouldBe "bucket1"
+ capturedGetRequest.key shouldBe "path/to/existing.file"
+
+ // verify returned value
+ existResult shouldBe true
+ }
+
+ it should "detect non-exiting file" in {
+ val f = fixture
+ val path = "s3://bucket1b/path/to/non-existing.file"
+
+ // mock S3 response for exist
+ Mockito.when(f.mockedS3Client.headObject(any[HeadObjectRequest]))
+ .thenThrow(NoSuchKeyException.builder.message("the file does not exist!").build())
+
+ val existResult = f.mockedS3FsUtils.exists(path)
+
+ // verify request content
+ val requestCaptor: Captor[HeadObjectRequest] = ArgCaptor[HeadObjectRequest]
+ Mockito.verify(f.mockedS3Client).headObject(requestCaptor.capture)
+ val capturedGetRequest = requestCaptor.value
+
+ capturedGetRequest.bucket shouldBe "bucket1b"
+ capturedGetRequest.key shouldBe "path/to/non-existing.file"
+
+ // verify returned value
+ existResult shouldBe false
+ }
+
+ it should "read data from S3 path" in {
+ val f = fixture
+ val path = "s3://bucket2/path/to/read.file"
+ val mockedFileContent = "This is the file content on S3"
+
+ val mockedResponseWithContent: ResponseBytes[GetObjectResponse] = mock[ResponseBytes[GetObjectResponse]]
+
+ // mock S3 response
+ Mockito.when(f.mockedS3Client.getObjectAsBytes(ArgumentMatchers.any[GetObjectRequest])).thenReturn(mockedResponseWithContent)
+ Mockito.when(mockedResponseWithContent.asUtf8String()).thenReturn(mockedFileContent)
+
+ val readingResult = f.mockedS3FsUtils.read(path)
+
+ // verify request content
+ val requestCaptor: Captor[GetObjectRequest] = ArgCaptor[GetObjectRequest]
+ Mockito.verify(f.mockedS3Client).getObjectAsBytes(requestCaptor.capture)
+ val capturedGetRequest = requestCaptor.value
+
+ capturedGetRequest.bucket shouldBe "bucket2"
+ capturedGetRequest.key shouldBe "path/to/read.file"
+
+ // verify returned value
+ readingResult shouldBe mockedFileContent
+ }
+
+ private case class MockedObjectDef(path: String, size: Long = 0L) {
+ def toObject: S3Object = S3Object.builder().key(path).size(size).build
+ }
+
+ private val mockedObjects1 = Seq(
+ MockedObjectDef("/dir/to/size/.hidden_file1.abc", 1L),
+ MockedObjectDef("/dir/to/size/_hidden.file2.abc", 2L),
+ MockedObjectDef("/dir/to/size/regular-file3.abc", 4L)
+ ).map(_.toObject)
+
+ private val mockedObjects2 = Seq(
+ MockedObjectDef("/dir/to/size/.hidden_file10.abc", 10L),
+ MockedObjectDef("/dir/to/size/_hidden.file20.abc", 20L),
+ MockedObjectDef("/dir/to/size/regular-file30.gz", 40L)
+ ).map(_.toObject)
+
+ it should "get dir size - simple (no filtering, no pagination)" in {
+ val f = fixture
+ val path = "s3://bucket3/dir/to/size"
+
+ val mockedListResponse: ListObjectsV2Response = ListObjectsV2Response.builder()
+ .isTruncated(false)
+ .contents(mockedObjects1.asJava)
+ .build
+
+ // mock S3 response
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])).thenReturn(mockedListResponse)
+ val dirSizeResult = f.mockedS3FsUtils.getDirectorySize(path)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture)
+ val capturedListRequest = requestCaptor.value
+
+ capturedListRequest.bucket shouldBe "bucket3"
+ capturedListRequest.prefix shouldBe "dir/to/size"
+ capturedListRequest.continuationToken shouldBe null
+
+ // verify returned value
+ dirSizeResult shouldBe 7L
+ }
+
+ {
+ val (f1, f2) = (fixture, fixture)
+ Seq(
+ (f1, "all files", (f1.mockedS3FsUtils.getDirectorySize(_)): String => Long, 77L),
+ (f2, "only non-hidden", (f2.mockedS3FsUtils.getDirectorySizeNoHidden(_)): String => Long, 44L)
+ )
+ }.foreach { case (f, testCaseName, getSizeOp, expectedSize) =>
+
+ it should s"get dir size for $testCaseName - with pagination listing" in {
+ val path = "s3://bucket3b/dir/to/size"
+
+ val mockedListResponses: Seq[ListObjectsV2Response] = Seq(
+ ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1")
+ .contents(mockedObjects1.asJava).build,
+ ListObjectsV2Response.builder().isTruncated(false)
+ .contents(mockedObjects2.asJava).build
+ )
+
+ // mock S3 responses
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request]))
+ .thenReturn(mockedListResponses(0))
+ .thenReturn(mockedListResponses(1))
+ val dirSizeResult = getSizeOp(path)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture)
+ val capturedListRequests = requestCaptor.values
+
+ // bucket & path should always be the same
+ capturedListRequests.foreach(_.bucket shouldBe "bucket3b")
+ capturedListRequests.foreach(_.prefix shouldBe "dir/to/size")
+
+ // when truncated, the continuationToken was passed along to the next request to resume correctly
+ capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1")
+
+ // verify returned value
+ dirSizeResult shouldBe expectedSize
+ }
+ }
+
+ Seq(
+ ("non-splittable", mockedObjects2, true),
+ ("splittable", mockedObjects1, false)
+ ).foreach { case (testCaseName, mockedObjects, expectedNonSplitability) =>
+ it should s"find the file list be $testCaseName (simple case, no pagination)" in {
+ val f = fixture
+ val path = "s3://bucket4/dir/to/split"
+
+ val mockedListResponse: ListObjectsV2Response = ListObjectsV2Response.builder()
+ .isTruncated(false)
+ .contents(mockedObjects.asJava)
+ .build
+
+ // mock S3 response
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])).thenReturn(mockedListResponse)
+ val isNonSplittableResult = f.mockedS3FsUtils.isNonSplittable(path)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture)
+ val capturedListRequest = requestCaptor.value
+
+ capturedListRequest.bucket shouldBe "bucket4"
+ capturedListRequest.prefix shouldBe "dir/to/split"
+ capturedListRequest.continuationToken shouldBe null
+
+ // verify returned value
+ isNonSplittableResult shouldBe expectedNonSplitability
+ }
+ }
+
+ it should s"find the file list be non-splittable with breakOut" in {
+ val f = fixture
+ val path = "s3://bucket4b/dir/to/split"
+
+ val mockedListResponses: Seq[ListObjectsV2Response] = Seq(
+ ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1")
+ .contents(mockedObjects1.asJava).build,
+ ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token2")
+ .contents(mockedObjects2.asJava).build
+ )
+
+ // mock S3 responses: pretend that there could be a third response with objects, but it should not be reached
+ // because non-splittable file was already found and the breakOut should prevent from further processing
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request]))
+ .thenReturn(mockedListResponses(0))
+ .thenReturn(mockedListResponses(1))
+ .thenThrow(new IllegalStateException("Unwanted state - breakOut for non-splitability does not work"))
+ val isNonSplittableResult = f.mockedS3FsUtils.isNonSplittable(path)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture)
+ val capturedListRequests = requestCaptor.values
+
+ // bucket & path should always be the same
+ capturedListRequests.foreach(_.bucket shouldBe "bucket4b")
+ capturedListRequests.foreach(_.prefix shouldBe "dir/to/split")
+
+ // when truncated, the continuationToken was passed along to the next request to resume correctly
+ capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1")
+
+ // verify returned value
+ isNonSplittableResult shouldBe true
+ }
+
+ it should s"delete files - with pagination listing" in {
+ val f = fixture
+ val path = "s3://bucket5/dir/to/delete"
+
+ // mock S3 list responses
+ val mockedListResponses: Seq[ListObjectsV2Response] = Seq(
+ ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1")
+ .contents(mockedObjects1.asJava).build,
+ ListObjectsV2Response.builder().isTruncated(false)
+ .contents(mockedObjects2.asJava).build
+ )
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request]))
+ .thenReturn(mockedListResponses(0))
+ .thenReturn(mockedListResponses(1))
+
+ // mock delete responses
+ val mockedDeleteReponse = mock[DeleteObjectsResponse]
+ Mockito.when(f.mockedS3Client.deleteObjects(ArgumentMatchers.any[DeleteObjectsRequest]))
+ .thenReturn(mockedDeleteReponse)
+ Mockito.when(mockedDeleteReponse.errors).thenReturn(List.empty[S3Error].asJava)
+
+ f.mockedS3FsUtils.deleteDirectoryRecursively(path)
+
+ // verify list request contents
+ val listRequestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(listRequestCaptor.capture)
+ val capturedListRequests = listRequestCaptor.values
+
+ // bucket & path should always be the same
+ capturedListRequests.foreach(_.bucket shouldBe "bucket5")
+ capturedListRequests.foreach(_.prefix shouldBe "dir/to/delete")
+
+ // when truncated, the continuationToken was passed along to the next request to resume correctly
+ capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1")
+
+ // verify delete requests made
+ val deleteRequestCaptor: Captor[DeleteObjectsRequest] = ArgCaptor[DeleteObjectsRequest]
+ Mockito.verify(f.mockedS3Client, Mockito.times(2)).deleteObjects(deleteRequestCaptor.capture)
+ val capturedDeleteRequests = deleteRequestCaptor.values
+
+ capturedDeleteRequests.foreach(_.bucket shouldBe "bucket5")
+ // the requests should hold the paths listed
+ val deletedKeysRequested = capturedDeleteRequests.flatMap(_.delete.objects.asScala.map(_.key))
+ deletedKeysRequested should contain theSameElementsInOrderAs (mockedObjects1 ++ mockedObjects2).map(_.key)
+ }
+
+ private val unrelatedVersionObjects = Seq(
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=aaaa/unrelated.file"),
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=-6/unrelated.file")
+ ).map(_.toObject)
+
+ Seq(
+ ("unrelated objects", unrelatedVersionObjects),
+ ("no objecdts", List.empty[S3Object])
+ ).foreach { case (testCaseName, mockedObjects) =>
+ it should s"find the latest version (simple case of $testCaseName - no recursion) to be 0" in {
+ val f = fixture
+ val path = "s3://bucket6/publish/path"
+ val reportDate = "2020-02-22"
+
+ // mock S3 list response
+ val mockedListResponse = ListObjectsV2Response.builder().isTruncated(false)
+ .contents(mockedObjects.asJava).build
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request]))
+ .thenReturn(mockedListResponse)
+
+ val lastestVersion = f.mockedS3FsUtils.getLatestVersion(path, reportDate)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture)
+ val capturedListRequests = requestCaptor.value
+
+ // bucket & path should always be the same
+ capturedListRequests.bucket shouldBe "bucket6"
+ capturedListRequests.prefix shouldBe "publish/path/enceladus_info_date=2020-02-22/enceladus_info_version="
+
+ // verify returned value
+ lastestVersion shouldBe 0
+ }
+ }
+
+ it should s"find the latest version (with recursion)" in {
+ val f = fixture
+ val path = "s3://bucket6b/publish/path"
+ val reportDate = "2020-02-22"
+
+ // mock S3 list responses
+ val mockedObjectForVersionLookoup1 = Seq(
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=1/file.abc"),
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=2/file2.abc"),
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=BOGUS/bogus.file")
+ ).map(_.toObject)
+
+ val mockedObjectForVersionLookoup2 = Seq(
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=4/file.abc"),
+ MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=6/.hidden.abc") // hidden = no problem
+ ).map(_.toObject)
+
+ val mockedListResponses: Seq[ListObjectsV2Response] = Seq(
+ ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1")
+ .contents(mockedObjectForVersionLookoup1.asJava).build,
+ ListObjectsV2Response.builder().isTruncated(false)
+ .contents(mockedObjectForVersionLookoup2.asJava).build
+ )
+
+ Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request]))
+ .thenReturn(mockedListResponses(0))
+ .thenReturn(mockedListResponses(1))
+ val latestVersion = f.mockedS3FsUtils.getLatestVersion(path, reportDate)
+
+ // verify request content
+ val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request]
+ Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture)
+ val capturedListRequests = requestCaptor.values
+
+ // bucket & path should always be the same
+ capturedListRequests.foreach(_.bucket shouldBe "bucket6b")
+ capturedListRequests.foreach(_.prefix shouldBe "publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=")
+
+ // when truncated, the continuationToken was passed along to the next request to resume correctly
+ capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1")
+
+ // verify returned value
+ latestVersion shouldBe 6
+ }
+
+}
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala
index 24b550a64..ba3a066fb 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala
@@ -15,9 +15,9 @@
package za.co.absa.enceladus.utils.general
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
-class AlgorithmsSpec extends FunSuite {
+class AlgorithmsSpec extends AnyFunSuite {
case class Person(firstName: String, lastName: String)
private val people = Seq(Person("Andrew", "Mikels"), Person("Andrew", "Gross"),
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala
index e771f273e..31e96a5b7 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala
@@ -16,10 +16,10 @@
package za.co.absa.enceladus.utils.general
import com.typesafe.config.ConfigFactory
-import org.scalatest.WordSpec
+import org.scalatest.wordspec.AnyWordSpec
import za.co.absa.enceladus.utils.config.ConfigReader
-class ConfigReaderSuite extends WordSpec {
+class ConfigReaderSuite extends AnyWordSpec {
private val config = ConfigFactory.parseString(
"""
|top = default
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala
index c4a38e318..5204f398a 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala
@@ -15,10 +15,10 @@
package za.co.absa.enceladus.utils.general
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class JsonUtilsSuite extends FunSuite with SparkTestBase {
+class JsonUtilsSuite extends AnyFunSuite with SparkTestBase {
test("Test JSON pretty formatting from a JSON string") {
val inputJson = """[{"id":1,"items":[{"itemid":100,"subitems":[{"elems":[{"numbers":["1","2","3b","4","5c","6"]}],"code":100}]}]}]"""
val expected = """[ {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala
index 15c97adbb..d1b863502 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala
@@ -17,11 +17,11 @@ package za.co.absa.enceladus.utils.general
import java.security.InvalidParameterException
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import scala.util.{Failure, Try}
-class SectionSuite extends FunSuite {
+class SectionSuite extends AnyFunSuite {
private def checkSectionRemoveExtractInject(
section: Section,
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala
index 0e89fe223..9b944fe35 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala
@@ -15,11 +15,11 @@
package za.co.absa.enceladus.utils.implicits
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class DataFrameImplicitsSuite extends FunSuite with SparkTestBase {
+class DataFrameImplicitsSuite extends AnyFunSuite with SparkTestBase {
import spark.implicits._
private val columnName = "data"
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala
index 9e3456433..199533d21 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala
@@ -17,10 +17,11 @@ package za.co.absa.enceladus.utils.implicits
import java.security.InvalidParameterException
-import org.scalatest.{FunSuite, Matchers}
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.utils.implicits.StringImplicits.StringEnhancements
-class StringImplicitsSuite extends FunSuite with Matchers {
+class StringImplicitsSuite extends AnyFunSuite with Matchers {
test("StringEnhancements.replaceChars - empty replacements") {
val s = "supercalifragilisticexpialidocious"
assert(s.replaceChars(Map.empty) == s)
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala
index a99b7c28d..bd41f998e 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala
@@ -15,11 +15,12 @@
package za.co.absa.enceladus.utils.schema
-import org.scalatest.{FunSuite, Matchers}
import org.apache.spark.sql.types._
+import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.matchers.should.Matchers
import za.co.absa.enceladus.utils.schema.SchemaUtils._
-class SchemaUtilsSuite extends FunSuite with Matchers {
+class SchemaUtilsSuite extends AnyFunSuite with Matchers {
// scalastyle:off magic.number
private val schema = StructType(Seq(
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala
index 01db97643..a5b82b03c 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala
@@ -18,10 +18,10 @@ package za.co.absa.enceladus.utils.schema
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
-class SparkUtilsSuite extends FunSuite with SparkTestBase {
+class SparkUtilsSuite extends AnyFunSuite with SparkTestBase {
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala
index 9e06ae20f..c72752a4c 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala
@@ -18,10 +18,10 @@ package za.co.absa.enceladus.utils.time
import java.security.InvalidParameterException
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.general.Section
-class DateTimePatternSuite extends FunSuite {
+class DateTimePatternSuite extends AnyFunSuite {
test("Pattern for timestamp") {
val pattern: String = "yyyy~mm~dd_HH.mm.ss"
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala
index 7cd1552bd..2384238da 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala
@@ -15,7 +15,7 @@
package za.co.absa.enceladus.utils.transformations
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.testUtils.SparkTestBase
import scala.util.Random
import org.apache.spark.sql.functions._
@@ -36,7 +36,7 @@ case class MyC2(something: Int, somethingByTwo: Int)
case class Nested2Levels(a: List[List[Option[Int]]])
case class Nested1Level(a: List[Option[Int]])
-class ArrayTransformationsSuite extends FunSuite with SparkTestBase {
+class ArrayTransformationsSuite extends AnyFunSuite with SparkTestBase {
private val inputData = (0 to 10).toList.map(x => (x, Random.shuffle((0 until x).toList)))
private val inputDataOrig = OuterStruct(-1, null) :: inputData.map({ case (x, vals) => OuterStruct(x, vals.map(InnerStruct(_))) })
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala
index f6667e03c..e4b5887fb 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala
@@ -19,11 +19,11 @@ import java.sql.{Date, Timestamp}
import java.util.TimeZone
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import scala.util.Success
-class DefaultsSuite extends FunSuite {
+class DefaultsSuite extends AnyFunSuite {
TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
test("ByteType") {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala
index 70837e625..8eacbd7d6 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala
@@ -18,14 +18,14 @@ package za.co.absa.enceladus.utils.types
import java.text.ParseException
import org.apache.spark.sql.types._
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.TypedStructField._
import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning}
import scala.util.{Failure, Success, Try}
-class TypedStructFieldSuite extends FunSuite {
+class TypedStructFieldSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
private val fieldName = "test_field"
private def createField(dataType: DataType,
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala
index e375a1056..005286ad5 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala
@@ -18,12 +18,12 @@ package za.co.absa.enceladus.utils.types.parsers
import java.sql.{Date, Timestamp}
import java.text.{ParseException, SimpleDateFormat}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
case class TestInputRow(id: Int, stringField: String)
-class DateTimeParserSuite extends FunSuite{
+class DateTimeParserSuite extends AnyFunSuite{
TimeZoneNormalizer.normalizeJVMTimeZone()
test("EnceladusDateParser class epoch") {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala
index 1020985d0..785ff1d76 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala
@@ -15,13 +15,13 @@
package za.co.absa.enceladus.utils.types.parsers
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern}
import za.co.absa.enceladus.utils.types.GlobalDefaults
import scala.util.Success
-class DecimalParserSuite extends FunSuite {
+class DecimalParserSuite extends AnyFunSuite {
test("No pattern, no limitations") {
val decimalSymbols: DecimalSymbols = GlobalDefaults.getDecimalSymbols
val pattern = NumericPattern(decimalSymbols)
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala
index 1d4238991..95b482860 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala
@@ -15,13 +15,13 @@
package za.co.absa.enceladus.utils.types.parsers
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern}
import za.co.absa.enceladus.utils.types.GlobalDefaults
import scala.util.Success
-class FractionalParserSuite extends FunSuite {
+class FractionalParserSuite extends AnyFunSuite {
private val reallyBigNumberString = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" +
"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" +
"12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" +
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala
index 16b80c4d0..7dfda5b70 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala
@@ -15,12 +15,12 @@
package za.co.absa.enceladus.utils.types.parsers
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern}
import za.co.absa.enceladus.utils.types.GlobalDefaults
import scala.util.Success
-class IntegralParser_PatternIntegralParserSuite extends FunSuite {
+class IntegralParser_PatternIntegralParserSuite extends AnyFunSuite {
test("No pattern, no limitations") {
val decimalSymbols: DecimalSymbols = GlobalDefaults.getDecimalSymbols
val pattern = NumericPattern(decimalSymbols)
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala
index a9318f583..b5125a6c3 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala
@@ -15,14 +15,14 @@
package za.co.absa.enceladus.utils.types.parsers
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.numeric.Radix
import za.co.absa.enceladus.utils.numeric.Radix.RadixFormatException
import za.co.absa.enceladus.utils.types.parsers.NumericParser.NumericParserException
import scala.util.Success
-class IntegralParser_RadixIntegralParserSuite extends FunSuite {
+class IntegralParser_RadixIntegralParserSuite extends AnyFunSuite {
test("base 10 parsing succeeds") {
val parser = IntegralParser.ofRadix(Radix(10))
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala
index b371dcc1b..38e59d725 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala
@@ -19,14 +19,14 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream,
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.types.{DecimalType, DoubleType, LongType, MetadataBuilder, ShortType, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.TypedStructField._
import za.co.absa.enceladus.utils.types.parsers.{DecimalParser, FractionalParser}
import za.co.absa.enceladus.utils.types.parsers.IntegralParser.{PatternIntegralParser, RadixIntegralParser}
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
-class UDFBuilderSuite extends FunSuite {
+class UDFBuilderSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
test("Serialization and deserialization of stringUdfViaNumericParser (FractionalParser)") {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala
index cbce9badc..58c77f995 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala
@@ -16,12 +16,12 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{BinaryType, MetadataBuilder, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationWarning}
-class BinaryValidatorSuite extends FunSuite {
+class BinaryValidatorSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
private def field(defaultValue: Option[String] = None, encoding: Option[String] = None, nullable: Boolean = true): TypedStructField = {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala
index 4ebb95342..a1bbb5004 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{DateType, MetadataBuilder, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning}
-class DateFieldValidatorSuite extends FunSuite {
+class DateFieldValidatorSuite extends AnyFunSuite {
TimeZoneNormalizer.normalizeJVMTimeZone()
private implicit val defaults: Defaults = GlobalDefaults
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala
index 95ed0fa00..ef26cf009 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{DataType, DoubleType, FloatType, MetadataBuilder, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.TypedStructField.FractionalTypeStructField
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.ValidationError
-class FractionalFieldValidatorSuite extends FunSuite {
+class FractionalFieldValidatorSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
private def field(dataType: DataType, metadataBuilder: MetadataBuilder): FractionalTypeStructField[_] = {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala
index fc0ce0a4d..83746b6cf 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala
@@ -16,14 +16,14 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{ByteType, DataType, IntegerType, LongType, MetadataBuilder, ShortType, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.numeric.Radix
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.TypedStructField.IntegralTypeStructField
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationWarning}
-class IntegralFieldValidatorSuite extends FunSuite {
+class IntegralFieldValidatorSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
private def field(dataType: DataType, metadataBuilder: MetadataBuilder): IntegralTypeStructField[_] = {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala
index 8cce6cda0..d93947c18 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{DataType, DecimalType, MetadataBuilder, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.types.TypedStructField.NumericTypeStructField
import za.co.absa.enceladus.utils.validation.ValidationError
-class NumericFieldValidatorSuite extends FunSuite {
+class NumericFieldValidatorSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
private def field(metadataBuilder: MetadataBuilder): NumericTypeStructField[_] = {
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala
index 1531b2ed6..815c43b22 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala
@@ -16,12 +16,12 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.ValidationError
-class ScalarFieldValidatorSuite extends FunSuite {
+class ScalarFieldValidatorSuite extends AnyFunSuite {
private implicit val defaults: Defaults = GlobalDefaults
diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala
index 2ef9218a2..fa98563f6 100644
--- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala
+++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala
@@ -16,13 +16,13 @@
package za.co.absa.enceladus.utils.validation.field
import org.apache.spark.sql.types.{MetadataBuilder, StructField, TimestampType}
-import org.scalatest.FunSuite
+import org.scalatest.funsuite.AnyFunSuite
import za.co.absa.enceladus.utils.schema.MetadataKeys
import za.co.absa.enceladus.utils.time.TimeZoneNormalizer
import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField}
import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning}
-class TimestampFieldValidatorSuite extends FunSuite {
+class TimestampFieldValidatorSuite extends AnyFunSuite {
TimeZoneNormalizer.normalizeJVMTimeZone()
private implicit val defaults: Defaults = GlobalDefaults