diff --git a/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala b/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala index 78cfef782..beeb69089 100644 --- a/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala +++ b/dao/src/main/scala/za/co/absa/enceladus/dao/auth/MenasCredentials.scala @@ -18,7 +18,7 @@ package za.co.absa.enceladus.dao.auth import com.typesafe.config.ConfigFactory import org.apache.spark.sql.SparkSession import sun.security.krb5.internal.ktab.KeyTab -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.HdfsUtils sealed abstract class MenasCredentials { val username: String @@ -40,9 +40,9 @@ object MenasPlainCredentials { * @return An instance of Menas Credentials. */ def fromFile(path: String)(implicit spark: SparkSession): MenasPlainCredentials = { - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) - val conf = ConfigFactory.parseString(fsUtils.getFileContent(path)) + val conf = ConfigFactory.parseString(fsUtils.getLocalOrDistributedFileContent(path)) MenasPlainCredentials(conf.getString("username"), conf.getString("password")) } } @@ -55,9 +55,9 @@ object MenasKerberosCredentials { * @return An instance of Menas Credentials. */ def fromFile(path: String)(implicit spark: SparkSession): MenasKerberosCredentials = { - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) - val localKeyTabPath = fsUtils.getLocalPathToFile(path) + val localKeyTabPath = fsUtils.getLocalPathToFileOrCopyToLocal(path) val keytab = KeyTab.getInstance(localKeyTabPath) val username = keytab.getOneName.getName diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala index 6b8941503..16b2db6c8 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/BaseTestSuite.scala @@ -15,10 +15,12 @@ package za.co.absa.enceladus.dao.rest -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfter, Matchers, WordSpec} +import org.mockito.scalatest.MockitoSugar +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.BeforeAndAfter +import org.scalatest.matchers.should.Matchers -abstract class BaseTestSuite extends WordSpec +abstract class BaseTestSuite extends AnyWordSpec with Matchers with MockitoSugar with BeforeAndAfter diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala index 937c43815..5d546150e 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/RestDaoFactorySuite.scala @@ -15,11 +15,12 @@ package za.co.absa.enceladus.dao.rest -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.dao.UnauthorizedException import za.co.absa.enceladus.dao.auth.{InvalidMenasCredentials, MenasKerberosCredentials, MenasPlainCredentials} -class RestDaoFactorySuite extends WordSpec with Matchers { +class RestDaoFactorySuite extends AnyWordSpec with Matchers { private val menasApiBaseUrls = List("http://localhost:8080/menas/api") diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala index 4e0a7df54..d6488918f 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/AuthClientSuite.scala @@ -16,15 +16,17 @@ package za.co.absa.enceladus.dao.rest.auth import org.mockito.stubbing.OngoingStubbing -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{BeforeAndAfter, Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.mockito.scalatest.MockitoSugar +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.BeforeAndAfter import org.springframework.http.{HttpHeaders, ResponseEntity} import org.springframework.util.LinkedMultiValueMap import org.springframework.web.client.RestTemplate import za.co.absa.enceladus.dao.UnauthorizedException import za.co.absa.enceladus.dao.rest.{ApiCaller, ApiCallerStub, AuthClient} -abstract class AuthClientSuite() extends WordSpec +abstract class AuthClientSuite() extends AnyWordSpec with Matchers with MockitoSugar with BeforeAndAfter { diff --git a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala index 1b920a7e3..e7c8429d9 100644 --- a/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala +++ b/dao/src/test/scala/za/co/absa/enceladus/dao/rest/auth/MenasPlainCredentialsSuite.scala @@ -15,12 +15,12 @@ package za.co.absa.enceladus.dao.rest.auth -import org.scalatest.WordSpec +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.dao.auth.MenasPlainCredentials -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.LocalFsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class MenasPlainCredentialsSuite extends WordSpec with SparkTestBase { +class MenasPlainCredentialsSuite extends AnyWordSpec with SparkTestBase { "MenasPlainCredentials" should { "be read from *.conf" in { @@ -42,9 +42,7 @@ class MenasPlainCredentialsSuite extends WordSpec with SparkTestBase { val homeDir = System.getProperty("user.home") val expected = s"$homeDir/dir/file" - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) - - val actual = fsUtils.replaceHome("~/dir/file") + val actual = LocalFsUtils.replaceHome("~/dir/file") assert(actual == expected) } } diff --git a/data-model/pom.xml b/data-model/pom.xml index ec7b7d507..0a20ba361 100644 --- a/data-model/pom.xml +++ b/data-model/pom.xml @@ -53,6 +53,12 @@ ${scalatest.version} compile + + org.scalatest + scalatest-funsuite_${scala.compat.version} + ${scalatest.version} + compile + diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala index 7ee290c8f..757dabda4 100644 --- a/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala +++ b/data-model/src/test/scala/za/co/absa/enceladus/model/conformanceRule/ConformanceRuleTest.scala @@ -17,9 +17,10 @@ package za.co.absa.enceladus.model.conformanceRule import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import com.fasterxml.jackson.module.scala.DefaultScalaModule -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec -class ConformanceRuleTest extends WordSpec with Matchers { +class ConformanceRuleTest extends AnyWordSpec with Matchers { private val objectMapper = new ObjectMapper() .registerModule(DefaultScalaModule) diff --git a/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala b/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala index 44db23caa..5d76ade0c 100644 --- a/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala +++ b/data-model/src/test/scala/za/co/absa/enceladus/model/menas/audit/AuditableTest.scala @@ -15,12 +15,12 @@ package za.co.absa.enceladus.model.menas.audit -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.{DropConformanceRule, LiteralConformanceRule} import za.co.absa.enceladus.model.conformanceRule.ConformanceRule -class AuditableTest extends FunSuite { +class AuditableTest extends AnyFunSuite { val obj1 = Dataset(name = "Test DS", version = 0, hdfsPath = "oldPath", diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala index 58fe2d223..eb46b94a4 100644 --- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala +++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample1.scala @@ -23,6 +23,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.RestDaoFactory import za.co.absa.enceladus.examples.interpreter.rules.custom.UppercaseCustomConformanceRule import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.time.TimeZoneNormalizer object CustomRuleSample1 { @@ -37,6 +38,8 @@ object CustomRuleSample1 { .getOrCreate() TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + def main(args: Array[String]) { // scalastyle:off magic.number val menasBaseUrls = List("http://localhost:8080/menas") @@ -78,7 +81,7 @@ object CustomRuleSample1 { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) outputData.show(false) //scalastyle:on magicnumber diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala index de79ace35..5ce905cde 100644 --- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala +++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample2.scala @@ -24,6 +24,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.examples.interpreter.rules.custom.LPadCustomConformanceRule import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.time.TimeZoneNormalizer object CustomRuleSample2 { @@ -38,6 +39,8 @@ object CustomRuleSample2 { .getOrCreate() TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + def main(args: Array[String]) { // scalastyle:off magic.number val conf = ConfigFactory.load() @@ -81,7 +84,7 @@ object CustomRuleSample2 { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) outputData.show(false) // scalastyle:on magic.number diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala index 932fa9fac..59fe41b7a 100644 --- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala +++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample3.scala @@ -24,6 +24,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.examples.interpreter.rules.custom.{LPadCustomConformanceRule, UppercaseCustomConformanceRule} import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.time.TimeZoneNormalizer object CustomRuleSample3 { @@ -33,6 +34,7 @@ object CustomRuleSample3 { .config("spark.sql.codegen.wholeStage", value = false) .getOrCreate() TimeZoneNormalizer.normalizeAll(spark) //normalize the timezone of JVM and the spark session + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) def main(args: Array[String]): Unit = { val conf = ConfigFactory.load() @@ -79,7 +81,7 @@ object CustomRuleSample3 { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) outputData.show() } diff --git a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala index fcae9619e..47e8dd649 100644 --- a/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala +++ b/examples/src/main/scala/za/co/absa/enceladus/examples/CustomRuleSample4.scala @@ -26,6 +26,7 @@ import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.examples.interpreter.rules.custom.{LPadCustomConformanceRule, UppercaseCustomConformanceRule} import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.time.TimeZoneNormalizer object CustomRuleSample4 { @@ -138,6 +139,7 @@ object CustomRuleSample4 { def main(args: Array[String]): Unit = { val cmd: CmdConfigLocal = getCmdLineArguments(args) implicit val spark: SparkSession = buildSparkSession() + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val conf = ConfigFactory.load() val menasBaseUrls = MenasConnectionStringParser.parse(conf.getString("menas.rest.uri")) @@ -186,7 +188,7 @@ object CustomRuleSample4 { .setCatalystWorkaroundEnabled(true) .setControlFrameworkEnabled(false) - val outputData: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) outputData.show() saveToCsv(outputData, cmd.outPath) } diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala index fb0202ad2..5009aa7c8 100644 --- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala +++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/UppercaseCustomConformanceRuleSuite.scala @@ -17,12 +17,13 @@ package za.co.absa.enceladus.examples.interpreter.rules.custom import org.apache.spark.sql import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite -import org.scalatest.mockito.MockitoSugar +import org.scalatest.funsuite.AnyFunSuite +import org.mockito.scalatest.MockitoSugar import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches} import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase @@ -32,11 +33,12 @@ object TestOutputRow { def apply(input: TestInputRow, doneUpper: String): TestOutputRow = TestOutputRow(input.id, input.mandatoryString, input.nullableString, doneUpper) } -class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase with MockitoSugar { +class UppercaseCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar { import spark.implicits._ implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules) implicit val dao: MenasDAO = mock[MenasDAO] // you may have to hard-code your own implementation here (if not working with menas) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val experimentalMR = true val isCatalystWorkaroundEnabled = true @@ -67,7 +69,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: Seq[TestOutputRow] = outputData.as[TestOutputRow].collect().toSeq val expected: Seq[TestOutputRow] = (input zip Seq("HELLO WORLD", "ONE RING TO RULE THEM ALL", "ALREADY CAPS")).map(x => TestOutputRow(x._1, x._2)) @@ -101,7 +103,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: Seq[TestOutputRow] = outputData.as[TestOutputRow].collect().toSeq val expected: Seq[TestOutputRow] = (input zip Seq("1", "4", "9")).map(x => TestOutputRow(x._1, x._2)) @@ -134,7 +136,7 @@ class UppercaseCustomConformanceRuleSuite extends FunSuite with SparkTestBase wi .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[TestOutputRow] = outputData.as[TestOutputRow].collect().toList val expected: List[TestOutputRow] = (input zip Seq("WHAT A BEAUTIFUL PLACE", "ONE RING TO FIND THEM", null)).map(x => TestOutputRow(x._1, x._2)).toList diff --git a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala index 0716dd4a4..c3f656b9b 100644 --- a/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala +++ b/examples/src/test/scala/za/co/absa/enceladus/examples/interpreter/rules/custom/XPadCustomConformanceRuleSuite.scala @@ -18,14 +18,15 @@ package za.co.absa.enceladus.examples.interpreter.rules.custom import com.typesafe.config.ConfigFactory import org.apache.spark.sql import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite -import org.scalatest.mockito.MockitoSugar +import org.scalatest.funsuite.AnyFunSuite +import org.mockito.scalatest.MockitoSugar import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches} import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.auth.MenasKerberosCredentials import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase case class XPadTestInputRow(intField: Int, stringField: Option[String]) @@ -34,11 +35,12 @@ object XPadTestOutputRow { def apply(input: XPadTestInputRow, targetField: String): XPadTestOutputRow = XPadTestOutputRow(input.intField, input.stringField, targetField) } -class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with MockitoSugar { +class LpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase with MockitoSugar { import spark.implicits._ implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules) implicit val dao: MenasDAO = mock[MenasDAO] // you may have to hard-code your own implementation here (if not working with menas) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val experimentalMR = true val isCatalystWorkaroundEnabled = true @@ -69,7 +71,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("~~~Short", "This is long", "~~~~~~~~")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -102,7 +104,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: Seq[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toSeq val expected: Seq[XPadTestOutputRow] = (input zip Seq("007", "042", "100000")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -135,7 +137,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("12abcdefgh", "1231231$$$", "1231231231")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -168,7 +170,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("A", "AAAAAAAAAAAAAAAAAAAA", "")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -178,7 +180,7 @@ class LpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase with Mo } -class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { +class RpadCustomConformanceRuleSuite extends AnyFunSuite with SparkTestBase { import spark.implicits._ @@ -187,6 +189,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { private val meansCredentials = MenasKerberosCredentials("user@EXAMPLE.COM", "src/test/resources/user.keytab.example") implicit val progArgs: ConformanceConfig = ConformanceConfig() // here we may need to specify some parameters (for certain rules) implicit val dao: MenasDAO = RestDaoFactory.getInstance(meansCredentials, menasBaseUrls) // you may have to hard-code your own implementation here (if not working with menas) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val experimentalMR = true val isCatalystWorkaroundEnabled = true @@ -217,7 +220,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("Short...", "This is long", "........")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -250,7 +253,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: Seq[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toSeq val expected: Seq[XPadTestOutputRow] = (input zip Seq("100", "420", "100000")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -283,7 +286,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("abcdefgh12", "$$$1231231", "1231231231")).map(x => XPadTestOutputRow(x._1, x._2)) @@ -316,7 +319,7 @@ class RpadCustomConformanceRuleSuite extends FunSuite with SparkTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val outputData: sql.DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + val outputData: sql.DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val output: List[XPadTestOutputRow] = outputData.as[XPadTestOutputRow].collect().toList val expected: List[XPadTestOutputRow] = (input zip List("A", "AAAAAAAAAAAAAAAAAAAA", "")).map(x => XPadTestOutputRow(x._1, x._2)) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala index 4c7936455..a6409af4f 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/auth/jwt/JwtFactoryTest.scala @@ -16,9 +16,10 @@ package za.co.absa.enceladus.menas.auth.jwt import io.jsonwebtoken.security.WeakKeyException -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec -class JwtFactoryTest extends WordSpec with Matchers { +class JwtFactoryTest extends AnyWordSpec with Matchers { private val secret = "1234567890qwertyuiopasdfghjklzxcvbnm" private val jwtFactory = new JwtFactory(secret) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala index 53f9596bb..a8b4fb699 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/controllers/SchemaControllerSuite.scala @@ -17,8 +17,9 @@ package za.co.absa.enceladus.menas.controllers import org.mockito.Mockito import org.scalatest.concurrent.Futures -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{AsyncFlatSpec, Matchers} +import org.mockito.scalatest.MockitoSugar +import org.scalatest.flatspec.AsyncFlatSpec +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.menas.models.SchemaApiFeatures import za.co.absa.enceladus.menas.services.SchemaRegistryService diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala index afaa00b86..773a448f2 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/integration/repositories/BaseRepositoryTest.scala @@ -18,7 +18,8 @@ package za.co.absa.enceladus.menas.integration.repositories import java.util.concurrent.TimeUnit import org.mongodb.scala.MongoDatabase -import org.scalatest.{BeforeAndAfter, WordSpec} +import org.scalatest.wordspec.AnyWordSpec +import org.scalatest.BeforeAndAfter import org.springframework.beans.factory.annotation.Autowired import za.co.absa.enceladus.menas.integration.TestContextManagement import za.co.absa.enceladus.menas.integration.fixtures.FixtureService @@ -27,7 +28,7 @@ import za.co.absa.enceladus.menas.services.MigrationService import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} -abstract class BaseRepositoryTest extends WordSpec with TestContextManagement with BeforeAndAfter { +abstract class BaseRepositoryTest extends AnyWordSpec with TestContextManagement with BeforeAndAfter { val awaitDuration: Duration = Duration(2000, TimeUnit.MILLISECONDS) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala index d87956178..7c6dceb57 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/schema/SchemaConvertersSuite.scala @@ -19,10 +19,10 @@ import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.menas.utils.converters.SparkMenasSchemaConvertor -class SchemaConvertersSuite extends FunSuite { +class SchemaConvertersSuite extends AnyFunSuite { val objectMapper: ObjectMapper = new ObjectMapper() .registerModule(DefaultScalaModule) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala index 159123322..f5d581a63 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/BaseServiceTest.scala @@ -15,15 +15,16 @@ package za.co.absa.enceladus.menas.services -import org.scalatest.{BeforeAndAfter, FunSuite} -import org.scalatest.mockito.MockitoSugar - -import scala.concurrent.duration.Duration import java.util.concurrent.TimeUnit +import org.mockito.scalatest.MockitoSugar +import org.scalatest.BeforeAndAfter +import org.scalatest.funsuite.AnyFunSuite + +import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} -abstract class BaseServiceTest extends FunSuite with MockitoSugar with BeforeAndAfter { +abstract class BaseServiceTest extends AnyFunSuite with MockitoSugar with BeforeAndAfter { val shortTimeout = Duration(100, TimeUnit.MILLISECONDS) val longTimeout = Duration(1000, TimeUnit.MILLISECONDS) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala index 86057fc30..3cc6e84a3 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/DatasetServiceTest.scala @@ -37,7 +37,7 @@ class DatasetServiceTest extends VersionedModelServiceTest[Dataset] { val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress()) Mockito.when(modelRepository.isUniqueName("dataset")).thenReturn(Future.successful(true)) - Mockito.when(modelRepository.create(any[Dataset](), eqTo("user"))).thenReturn(Future.failed(writeException)) + Mockito.when(modelRepository.create(any[Dataset], eqTo("user"))).thenReturn(Future.failed(writeException)) val result = intercept[ValidationException] { await(service.create(dataset, "user")) @@ -52,7 +52,7 @@ class DatasetServiceTest extends VersionedModelServiceTest[Dataset] { Mockito.when(modelRepository.getVersion("dataset", 1)).thenReturn(Future.successful(Some(dataset))) Mockito.when(modelRepository.getLatestVersionValue("dataset")).thenReturn(Future.successful(Some(1))) Mockito.when(modelRepository.isUniqueName("dataset")).thenReturn(Future.successful(true)) - Mockito.when(modelRepository.update(eqTo("user"), any[Dataset]())).thenReturn(Future.failed(writeException)) + Mockito.when(modelRepository.update(eqTo("user"), any[Dataset])).thenReturn(Future.failed(writeException)) val result = intercept[ValidationException] { await(service.update("user", dataset)) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala index f832ffc60..29be50476 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/services/RunServiceTest.scala @@ -17,7 +17,7 @@ package za.co.absa.enceladus.menas.services import com.mongodb.{MongoWriteException, ServerAddress, WriteError} import org.mockito.ArgumentMatchers.any -import org.mockito.Mockito +import org.mockito.scalatest.MockitoSugar import org.mongodb.scala.Completed import org.mongodb.scala.bson.BsonDocument import za.co.absa.enceladus.menas.exceptions.ValidationException @@ -28,7 +28,7 @@ import za.co.absa.enceladus.model.test.factories.RunFactory import scala.concurrent.Future -class RunServiceTest extends BaseServiceTest { +class RunServiceTest extends BaseServiceTest with MockitoSugar { //mocks private val runRepository = mock[RunMongoRepository] @@ -41,7 +41,7 @@ class RunServiceTest extends BaseServiceTest { test("validate Run with non-unique ID") { val run = RunFactory.getDummyRun(uniqueId = Option(uniqueId)) - Mockito.when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(true)) + when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(true)) val validation = await(runService.validate(run)) @@ -60,7 +60,7 @@ class RunServiceTest extends BaseServiceTest { test("validate valid Run") { val run = RunFactory.getDummyRun(uniqueId = Option(uniqueId)) - Mockito.when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(false)) + when(runRepository.existsId(uniqueId)).thenReturn(Future.successful(false)) val validation = await(runService.validate(run)) @@ -72,11 +72,11 @@ class RunServiceTest extends BaseServiceTest { val run2 = run1.copy(runId = 2) val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress()) - Mockito.when(runRepository.getLatestRun("dataset", 1)).thenReturn( + when(runRepository.getLatestRun("dataset", 1)).thenReturn( Future.successful(None), Future.successful(Some(run1))) - Mockito.when(runRepository.existsId(any[String]())).thenReturn(Future.successful(false)) - Mockito.when(runRepository.create(any[Run]())).thenReturn( + when(runRepository.existsId(any[String])).thenReturn(Future.successful(false)) + when(runRepository.create(any[Run])).thenReturn( Future.failed(writeException), Future.successful(Completed())) @@ -89,12 +89,12 @@ class RunServiceTest extends BaseServiceTest { val run2 = run1.copy(runId = 2) val writeException = new MongoWriteException(new WriteError(1, "", new BsonDocument()), new ServerAddress()) - Mockito.when(runRepository.getLatestRun("dataset", 1)).thenReturn( + when(runRepository.getLatestRun("dataset", 1)).thenReturn( Future.successful(None), Future.successful(Some(run1)), Future.successful(Some(run2))) - Mockito.when(runRepository.existsId(any[String]())).thenReturn(Future.successful(false)) - Mockito.when(runRepository.create(any[Run]())).thenReturn( + when(runRepository.existsId(any[String])).thenReturn(Future.successful(false)) + when(runRepository.create(any[Run])).thenReturn( Future.failed(writeException), Future.failed(writeException), Future.successful(Completed())) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala index c709fc1bd..805d5a8c1 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/SchemaTypeSuite.scala @@ -15,10 +15,11 @@ package za.co.absa.enceladus.menas.utils -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaFormatException -class SchemaTypeSuite extends FlatSpec with Matchers { +class SchemaTypeSuite extends AnyFlatSpec with Matchers { "SchemaType.fromSchemaName" should "correctly derive SchemaType.Value from string" in { SchemaType.fromSchemaName("struct") shouldBe SchemaType.Struct diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala index f3553658b..e38458eda 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/converters/SparkMenasSchemaConvertorSuite.scala @@ -15,7 +15,7 @@ package za.co.absa.enceladus.menas.utils.converters -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.apache.spark.sql.types._ import za.co.absa.enceladus.model._ import za.co.absa.enceladus.utils.testUtils.SparkTestBase @@ -25,7 +25,7 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.databind.SerializationFeature import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaParsingException -class SparkMenasSchemaConvertorSuite extends FunSuite with SparkTestBase { +class SparkMenasSchemaConvertorSuite extends AnyFunSuite with SparkTestBase { private val objectMapper = new ObjectMapper() .registerModule(DefaultScalaModule) .registerModule(new JavaTimeModule()) diff --git a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala index b7f223770..854f21894 100644 --- a/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala +++ b/menas/src/test/scala/za/co/absa/enceladus/menas/utils/parsers/SchemaParserSuite.scala @@ -18,17 +18,18 @@ package za.co.absa.enceladus.menas.utils.parsers import org.apache.avro.SchemaParseException import org.apache.commons.io.IOUtils import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType} -import org.mockito.ArgumentMatchers.any import org.mockito.Mockito -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Inside, Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Inside +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException import za.co.absa.enceladus.menas.TestResourcePath import za.co.absa.enceladus.menas.models.rest.exceptions.SchemaParsingException import za.co.absa.enceladus.menas.utils.SchemaType import za.co.absa.enceladus.menas.utils.converters.SparkMenasSchemaConvertor -class SchemaParserSuite extends WordSpec with Matchers with MockitoSugar with Inside { +class SchemaParserSuite extends AnyWordSpec with Matchers with MockitoSugar with Inside { val mockSchemaConvertor: SparkMenasSchemaConvertor = mock[SparkMenasSchemaConvertor] val someStructType: StructType = StructType(Seq(StructField(name = "field1", dataType = DataTypes.IntegerType))) diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala index e49b74820..3d72d483d 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/BaseMigrationSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.migrations.framework -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.framework.fixture.MigrationTestData._ import za.co.absa.enceladus.migrations.framework.fixture.MigrationTestDoubles._ -class BaseMigrationSuite extends FunSuite { +class BaseMigrationSuite extends AnyFunSuite { test("Test collection names are determined properly for a given db version") { val mig = new Migrator(DocumentDbStub, MigrationExample0 :: MigrationExample1 :: Nil) diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala index 4ed821ada..6a75b6fd5 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/MigrationUseCaseSuite.scala @@ -15,10 +15,10 @@ package za.co.absa.enceladus.migrations.framework -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.framework.fixture.UseCaseTestData -class MigrationUseCaseSuite extends FunSuite { +class MigrationUseCaseSuite extends AnyFunSuite { test("Test a database initialization") { val testData = new UseCaseTestData diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala index 03632ff4c..3bd98cefa 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/ObjectIdToolsSuite.scala @@ -15,9 +15,9 @@ package za.co.absa.enceladus.migrations.framework -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class ObjectIdToolsSuite extends FunSuite { +class ObjectIdToolsSuite extends AnyFunSuite { test("Test ObjectId extractor ") { val doc1 = """{ "_id" : { "$oid" : "5b98eea5a43a28a6154a2453" }, "name" : "Test" }""" diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala index f077ea5e7..6dcf65598 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/EntityMapSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.migrations.framework.continuous -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.continuous.EntityVersionMap import za.co.absa.enceladus.migrations.framework.continuous.fixture.EntityVersionMapMock -class EntityMapSuite extends FunSuite { +class EntityMapSuite extends AnyFunSuite { test("Test entity version map returns correct mapping when it is available") { val enp: EntityVersionMap = new EntityVersionMapMock diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala index e7d1a5a98..80f9330af 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/ContinuousMigrationIntegrationSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.migrations.framework.continuous.integration -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.continuous.migrate01.ContinuousMigrator import za.co.absa.enceladus.migrations.framework.continuous.integration.fixture.ExampleDatabaseFixture -class ContinuousMigrationIntegrationSuite extends FunSuite with ExampleDatabaseFixture { +class ContinuousMigrationIntegrationSuite extends AnyFunSuite with ExampleDatabaseFixture { test("Test schema migrates properly and conflicts are resolved") { val mig = new ContinuousMigrator(db, db) diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala index 321e7a696..511cd9e27 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/continuous/integration/EntityMapIntegrationSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.migrations.framework.continuous.integration -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.continuous.{EntityVersionMap, EntityVersionMapMongo} import za.co.absa.enceladus.migrations.framework.integration.fixture.MongoDbFixture -class EntityMapIntegrationSuite extends FunSuite with MongoDbFixture { +class EntityMapIntegrationSuite extends AnyFunSuite with MongoDbFixture { test("Test entity version map returns correct mapping when it is available") { val enp: EntityVersionMap = new EntityVersionMapMongo(dbRaw) diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala index 5382fe37a..8a2a2f8e2 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MigrationsIntegrationSuite.scala @@ -15,14 +15,14 @@ package za.co.absa.enceladus.migrations.framework.integration -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.framework.Configuration.DatabaseVersionCollectionName import za.co.absa.enceladus.migrations.framework.Migrator import za.co.absa.enceladus.migrations.framework.dao.ScalaMongoImplicits import za.co.absa.enceladus.migrations.framework.integration.fixture.MigrationsFixture import za.co.absa.enceladus.migrations.framework.integration.data.IntegrationTestData -class MigrationsIntegrationSuite extends FunSuite with MigrationsFixture { +class MigrationsIntegrationSuite extends AnyFunSuite with MigrationsFixture { val testData = new IntegrationTestData import testData._ diff --git a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala index 1320e1956..15addb22a 100644 --- a/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala +++ b/migrations/src/test/scala/za/co/absa/enceladus/migrations/framework/integration/MongoDbIntegrationSuite.scala @@ -16,11 +16,11 @@ package za.co.absa.enceladus.migrations.framework.integration import org.mongodb.scala.bson.collection.immutable.Document -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.migrations.framework.integration.fixture.MongoDbFixture import za.co.absa.enceladus.migrations.framework.migration.{ASC, DESC, IndexField} -class MongoDbIntegrationSuite extends FunSuite with MongoDbFixture { +class MongoDbIntegrationSuite extends AnyFunSuite with MongoDbFixture { import za.co.absa.enceladus.migrations.framework.dao.ScalaMongoImplicits._ test("Test add/drop collections") { diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala index 723123462..133509e12 100644 --- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala +++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/ControlInfoSerSuite.scala @@ -16,11 +16,11 @@ package za.co.absa.enceladus.plugins.buildin.kafka import org.apache.commons.io.IOUtils -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.plugins.buildin.factories.DceControlInfoFactory import za.co.absa.enceladus.plugins.builtin.controlinfo.ControlInfoAvroSerializer -class ControlInfoSerSuite extends FunSuite { +class ControlInfoSerSuite extends AnyFunSuite { test ("Control info key serialize to Avro") { val dceControlInfo = DceControlInfoFactory.getDummyDceControlInfo() val avroControlInfoKey = ControlInfoAvroSerializer.convertInfoKey(dceControlInfo) diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala index b673c736f..1f2e0308f 100644 --- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala +++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/buildin/kafka/KafkaPluginSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.plugins.buildin.kafka import com.typesafe.config.ConfigFactory -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.plugins.buildin.factories.DceControlInfoFactory import za.co.absa.enceladus.plugins.buildin.kafka.dummy.DummyControlInfoProducer import za.co.absa.enceladus.plugins.builtin.common.mq.kafka.{KafkaConnectionParams, KafkaSecurityParams} @@ -24,7 +24,7 @@ import za.co.absa.enceladus.plugins.builtin.controlinfo.mq.ControlInfoSenderPlug import scala.collection.JavaConverters._ -class KafkaPluginSuite extends FunSuite { +class KafkaPluginSuite extends AnyFunSuite { test("Test Kafka info plugin sends control measurements") { val producer = new DummyControlInfoProducer val dceControlInfo = DceControlInfoFactory.getDummyDceControlInfo() diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala index de95d10e4..d2d29d335 100644 --- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala +++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/mq/KafkaErrorSenderPluginSuite.scala @@ -22,7 +22,9 @@ import com.github.tomakehurst.wiremock.client.WireMock._ import com.github.tomakehurst.wiremock.core.WireMockConfiguration import com.typesafe.config.{ConfigFactory, ConfigValueFactory} import org.apache.spark.sql.DataFrame -import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import org.scalatest.BeforeAndAfterAll import za.co.absa.abris.avro.read.confluent.SchemaManager import za.co.absa.enceladus.plugins.builtin.common.mq.kafka.KafkaConnectionParams import za.co.absa.enceladus.plugins.builtin.errorsender.DceError @@ -33,7 +35,7 @@ import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class KafkaErrorSenderPluginSuite extends FlatSpec with SparkTestBase with Matchers with BeforeAndAfterAll { +class KafkaErrorSenderPluginSuite extends AnyFlatSpec with SparkTestBase with Matchers with BeforeAndAfterAll { private val port = 6081 private val wireMockServer = new WireMockServer(WireMockConfiguration.wireMockConfig().port(port)) diff --git a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala index 07c03eb6f..002638888 100644 --- a/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala +++ b/plugins-builtin/src/test/scala/za/co/absa/enceladus/plugins/builtin/errorsender/params/ErrorSenderPluginParamsSuite.scala @@ -17,10 +17,11 @@ package za.co.absa.enceladus.plugins.builtin.errorsender.params import java.time.Instant -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.utils.modules.SourcePhase -class ErrorSenderPluginParamsSuite extends FlatSpec with Matchers { +class ErrorSenderPluginParamsSuite extends AnyFlatSpec with Matchers { private val params = ErrorSenderPluginParams( datasetName = "datasetName1", diff --git a/pom.xml b/pom.xml index a65850f27..0b9cb1333 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ 1.6 3.1.1 - 0.2.6 + 3.0.0 2.13.65 2.7.3 3.5.4 @@ -163,7 +163,8 @@ 4.11 0-10 4.17.10 - 2.10.0 + 1.15.0 + 3.5.2 2.22.2 3.6.4 2.7.0 @@ -174,7 +175,7 @@ 0.9.0 2.11.12 2.0.0 - 3.0.5 + 3.2.2 4.0.0-RC2 2.4 0.2.1 @@ -334,10 +335,22 @@ 4.11 test + + org.mockito + mockito-scala_${scala.compat.version} + ${mockito.scala.version} + test + + + org.mockito + mockito-scala-scalatest_${scala.compat.version} + ${mockito.scala.version} + test + org.mockito mockito-core - ${mockito.version} + ${mockito.core.version} test diff --git a/spark-jobs/src/main/resources/reference.conf b/spark-jobs/src/main/resources/reference.conf index 461e29b11..48efb6d48 100644 --- a/spark-jobs/src/main/resources/reference.conf +++ b/spark-jobs/src/main/resources/reference.conf @@ -95,3 +95,10 @@ timezone="UTC" # Optional security settings #kafka.security.protocol="SASL_SSL" #kafka.sasl.mechanism="GSSAPI" + +# S3 specific settings: +s3.region = "eu-west-1" # default region, overridable + +# s3.kmsKeyId is recommended to set externally only: +# s3.kmsKeyId = "arn:aws:kms:eu-west-1:XXXX:key/YYYY" + diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala new file mode 100644 index 000000000..3b209d32e --- /dev/null +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/S3DefaultCredentialsProvider.scala @@ -0,0 +1,24 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus + +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider + +trait S3DefaultCredentialsProvider { + + implicit val defaultCredentialsProvider = DefaultCredentialsProvider.create() + +} diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala index 8e720935d..b11a1c143 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/CommonJobExecution.scala @@ -22,10 +22,13 @@ import com.typesafe.config.{Config, ConfigFactory} import org.apache.spark.SPARK_VERSION import org.apache.spark.sql.SparkSession import org.slf4j.{Logger, LoggerFactory} +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider +import software.amazon.awssdk.regions.Region import za.co.absa.atum.AtumImplicits -import za.co.absa.atum.core.Atum +import za.co.absa.atum.core.{Atum, ControlType} +import za.co.absa.atum.persistence.S3KmsSettings import za.co.absa.enceladus.common.Constants.{InfoDateColumn, InfoVersionColumn} -import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig} +import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config} import za.co.absa.enceladus.common.plugin.PostProcessingService import za.co.absa.enceladus.common.plugin.menas.{MenasPlugin, MenasRunUrl} import za.co.absa.enceladus.common.version.SparkVersionGuard @@ -34,7 +37,7 @@ import za.co.absa.enceladus.dao.rest.MenasConnectionStringParser import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.plugins.builtin.errorsender.params.ErrorSenderPluginParams import za.co.absa.enceladus.utils.config.{ConfigReader, SecureConfig} -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.{DistributedFsUtils, S3FsUtils} import za.co.absa.enceladus.utils.general.ProjectMetadataTools import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.modules.SourcePhase.Standardization @@ -42,13 +45,14 @@ import za.co.absa.enceladus.utils.performance.PerformanceMeasurer import za.co.absa.enceladus.utils.time.TimeZoneNormalizer import scala.util.control.NonFatal -import scala.util.{Failure, Random, Success, Try} +import scala.util.{Failure, Success, Try} trait CommonJobExecution { protected case class PreparationResult(dataset: Dataset, reportVersion: Int, pathCfg: PathConfig, + s3Config: S3Config, performance: PerformanceMeasurer) TimeZoneNormalizer.normalizeJVMTimeZone() @@ -78,7 +82,7 @@ trait CommonJobExecution { protected def prepareJob[T]() (implicit dao: MenasDAO, cmd: JobConfigParser[T], - fsUtils: FileSystemVersionUtils, + fsUtils: DistributedFsUtils, spark: SparkSession): PreparationResult = { val confReader: ConfigReader = new ConfigReader(conf) confReader.logEffectiveConfigProps(Constants.ConfigKeysToRedact) @@ -87,8 +91,9 @@ trait CommonJobExecution { val dataset = dao.getDataset(cmd.datasetName, cmd.datasetVersion) val reportVersion = getReportVersion(cmd, dataset) val pathCfg: PathConfig = getPathConfig(cmd, dataset, reportVersion) + val s3Config: S3Config = getS3Config - validateOutputPath(fsUtils, pathCfg) + validateOutputPath(s3Config, pathCfg) // Enable Spline import za.co.absa.spline.harvester.SparkLineageInitializer._ @@ -97,23 +102,21 @@ trait CommonJobExecution { // Enable non-default persistence storage level if provided in the command line cmd.persistStorageLevel.foreach(Atum.setCachingStorageLevel) - PreparationResult(dataset, reportVersion, pathCfg, new PerformanceMeasurer(spark.sparkContext.appName)) + PreparationResult(dataset, reportVersion, pathCfg, s3Config, new PerformanceMeasurer(spark.sparkContext.appName)) } - protected def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit + protected def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit - protected def validateIfPathAlreadyExists(fsUtils: FileSystemVersionUtils, path: String): Unit = { - // TODO fix for s3 [ref issue #1416] - -// if (fsUtils.hdfsExists(path)) { -// throw new IllegalStateException( -// s"Path $path already exists. Increment the run version, or delete $path" -// ) -// } + protected def validateIfPathAlreadyExists(s3Config: S3Config, path: String)(implicit fsUtils: DistributedFsUtils): Unit = { + if (fsUtils.exists(path)) { + throw new IllegalStateException( + s"Path $path already exists. Increment the run version, or delete $path" + ) + } } protected def runPostProcessing[T](sourcePhase: SourcePhase, preparationResult: PreparationResult, jobCmdConfig: JobConfigParser[T]) - (implicit spark: SparkSession, fileSystemVersionUtils: FileSystemVersionUtils): Unit = { + (implicit spark: SparkSession, fileSystemVersionUtils: DistributedFsUtils): Unit = { val outputPath = sourcePhase match { case Standardization => preparationResult.pathCfg.standardizationPath case _ => preparationResult.pathCfg.publishPath @@ -130,8 +133,8 @@ trait CommonJobExecution { }.mkString(",") } - val sourceSystem = "source1" //Atum.getControlMeasure.metadata.sourceApplication // TODO fix for s3 [ref issue #1416] - val uniqueRunId = Some(s"runId-${Math.abs(Random.nextLong())}") //Atum.getControlMeasure.runUniqueId // TODO fix for s3 [ref issue #1416] + val sourceSystem = Atum.getControlMeasure.metadata.sourceApplication + val uniqueRunId = Atum.getControlMeasure.runUniqueId val params = ErrorSenderPluginParams(jobCmdConfig.datasetName, jobCmdConfig.datasetVersion, jobCmdConfig.reportDate, preparationResult.reportVersion, outputPath, @@ -166,6 +169,18 @@ trait CommonJobExecution { ) } + protected def getS3Config: S3Config = { + val keyId = conf.getString("s3.kmsKeyId") + val region = Region.of(conf.getString("s3.region")) + + S3Config(region, keyId) + } + + protected def getS3FsUtil(implicit credentialsProvider: AwsCredentialsProvider): S3FsUtils = { + val s3Config = getS3Config + S3FsUtils(s3Config.region, S3KmsSettings(s3Config.kmsKeyId)) + } + private def buildPublishPath[T](cmd: JobConfigParser[T], ds: Dataset, reportVersion: Int): String = { val infoDateCol: String = InfoDateColumn val infoVersionCol: String = InfoVersionColumn @@ -218,12 +233,13 @@ trait CommonJobExecution { } protected def handleEmptyOutput(job: SourcePhase)(implicit spark: SparkSession): Unit = { - import za.co.absa.atum.core.Constants._ val areCountMeasurementsAllZero = Atum.getControlMeasure.checkpoints .flatMap(checkpoint => checkpoint.controls.filter(control => - control.controlName.equalsIgnoreCase(controlTypeRecordCount))) + ControlType.isControlMeasureTypeEqual(control.controlType, ControlType.Count.value) + ) + ) .forall(m => Try(m.controlValue.toString.toDouble).toOption.contains(0D)) if (areCountMeasurementsAllZero) { @@ -235,7 +251,7 @@ trait CommonJobExecution { } } - private def getReportVersion[T](jobConfig: JobConfigParser[T], dataset: Dataset)(implicit fsUtils: FileSystemVersionUtils): Int = { + private def getReportVersion[T](jobConfig: JobConfigParser[T], dataset: Dataset)(implicit fsUtils: DistributedFsUtils): Int = { jobConfig.reportVersion match { case Some(version) => version case None => diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala index da0e4902b..303f470cc 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/Constants.scala @@ -31,6 +31,7 @@ object Constants { "spark.yarn.dist.files", "spline.mongodb.url", "sun.boot.class.path", - "sun.java.command" + "sun.java.command", + "s3.kmsKeyId" ) } diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala index 86ca68885..a8f6bb64d 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/ControlInfoValidation.scala @@ -15,7 +15,7 @@ package za.co.absa.enceladus.common -import za.co.absa.atum.core.Atum +import za.co.absa.atum.core.{Atum, ControlType} import za.co.absa.atum.model.Checkpoint import za.co.absa.enceladus.utils.implicits.OptionImplicits._ import za.co.absa.enceladus.utils.validation.ValidationException @@ -69,14 +69,14 @@ object ControlInfoValidation { checkpoint <- checkpoints .find(c => c.name.equalsIgnoreCase(checkpointName) || c.workflowName.equalsIgnoreCase(checkpointName)) .toTry(new Exception(s"Missing $checkpointName checkpoint")) - measurement <- checkpoint.controls.find(m => m.controlType.equalsIgnoreCase(controlTypeRecordCount)) - .toTry(new Exception(s"$checkpointName checkpoint does not have a $controlTypeRecordCount control")) + measurement <- checkpoint.controls.find(m => ControlType.isControlMeasureTypeEqual(m.controlType, ControlType.Count.value)) + .toTry(new Exception(s"$checkpointName checkpoint does not have a ${ControlType.Count.value} control")) res <- Try { val rowCount = measurement.controlValue.toString.toLong if (rowCount >= 0) rowCount else throw new Exception(s"Negative value") }.recoverWith { case t: Throwable => - Failure(new Exception(s"Wrong $checkpointName $controlTypeRecordCount value: ${t.getMessage}")) + Failure(new Exception(s"Wrong $checkpointName ${ControlType.Count.value} value: ${t.getMessage}")) } } yield res diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala new file mode 100644 index 000000000..9be6ea7f5 --- /dev/null +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/config/S3Config.scala @@ -0,0 +1,20 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.common.config + +import software.amazon.awssdk.regions.Region + +case class S3Config(region: Region, kmsKeyId: String) diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala index b28219e03..418ed13d0 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/common/plugin/menas/MenasPlugin.scala @@ -52,7 +52,7 @@ object MenasPlugin { isJobStageOnly, generateNewRun) listener = Option(eventListener) - //PluginManager.loadPlugin(eventListener) // TODO fix for s3 [ref issue #1416] + PluginManager.loadPlugin(eventListener) } def runUniqueId: Option[String] = listener.flatMap(_.runUniqueId) diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala index d0177fdc8..5325eccae 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/ConformanceExecution.scala @@ -20,11 +20,14 @@ import java.io.{PrintWriter, StringWriter} import org.apache.spark.sql.functions.{lit, to_date} import org.apache.spark.sql.{DataFrame, SparkSession} import za.co.absa.atum.AtumImplicits -import za.co.absa.atum.AtumImplicits._ +import za.co.absa.atum.AtumImplicits.{DataSetWrapper, SparkSessionWrapper} import za.co.absa.atum.core.Atum +import za.co.absa.atum.persistence.S3KmsSettings +import za.co.absa.atum.utils.S3Utils.StringS3LocationExt +import za.co.absa.enceladus.S3DefaultCredentialsProvider import za.co.absa.enceladus.common.Constants.{InfoDateColumn, InfoDateColumnString, InfoVersionColumn, ReportDateFormat} import za.co.absa.enceladus.common.RecordIdGeneration._ -import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig} +import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config} import za.co.absa.enceladus.common.plugin.menas.MenasPlugin import za.co.absa.enceladus.common.{CommonJobExecution, Constants, RecordIdGeneration} import za.co.absa.enceladus.conformance.config.{ConformanceConfig, ConformanceConfigParser} @@ -34,7 +37,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.auth.MenasCredentials import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.performance.PerformanceMetricTools @@ -43,37 +46,36 @@ import za.co.absa.enceladus.utils.schema.SchemaUtils import scala.util.control.NonFatal import scala.util.{Failure, Success, Try} -trait ConformanceExecution extends CommonJobExecution { +trait ConformanceExecution extends CommonJobExecution with S3DefaultCredentialsProvider { private val conformanceReader = new ConformancePropertiesProvider private val sourceId = SourcePhase.Conformance protected def prepareConformance[T](preparationResult: PreparationResult) (implicit dao: MenasDAO, cmd: ConformanceConfigParser[T], - fsUtils: FileSystemVersionUtils, + fsUtils: DistributedFsUtils, spark: SparkSession): Unit = { - //val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath) - //preparationResult.performance.startMeasurement(stdDirSize) // TODO fix for s3 [ref issue #1416] + + val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath) + preparationResult.performance.startMeasurement(stdDirSize) log.info(s"standardization path: ${preparationResult.pathCfg.standardizationPath}") log.info(s"publish path: ${preparationResult.pathCfg.publishPath}") - // Enable Control Framework - import za.co.absa.atum.AtumImplicits.SparkSessionWrapper - // reinitialize Control Framework in case of combined job if(cmd.isInstanceOf[StandardizationConformanceConfig]) { spark.disableControlMeasuresTracking() } - // InputPath is standardizationPath in the combined job - // TODO fix for s3 [ref issue #1416] -// spark.enableControlMeasuresTracking(s"${preparationResult.pathCfg.standardizationPath}/_INFO") -// .setControlMeasuresWorkflow(sourceId.toString) + val dataS3Location = preparationResult.pathCfg.standardizationPath.toS3Location(preparationResult.s3Config.region) + val infoS3Location = dataS3Location.copy(path = s"${dataS3Location.path}/_INFO") + + // Enable Control Framework + spark.enableControlMeasuresTrackingForS3(sourceS3Location = Some(infoS3Location), destinationS3Config = None) + .setControlMeasuresWorkflow(sourceId.toString) // Enable control framework performance optimization for pipeline-like jobs - // TODO fix for s3 [ref issue #1416] - //Atum.setAllowUnpersistOldDatasets(true) + Atum.setAllowUnpersistOldDatasets(true) // Enable Menas plugin for Control Framework MenasPlugin.enableMenas( @@ -92,8 +94,8 @@ trait ConformanceExecution extends CommonJobExecution { } } - override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = { - validateIfPathAlreadyExists(fsUtils, pathConfig.publishPath) + override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = { + validateIfPathAlreadyExists(s3Config, pathConfig.publishPath) } protected def readConformanceInputData(pathCfg: PathConfig)(implicit spark: SparkSession): DataFrame = { @@ -101,22 +103,23 @@ trait ConformanceExecution extends CommonJobExecution { } protected def conform[T](inputData: DataFrame, preparationResult: PreparationResult) - (implicit spark: SparkSession, cmd: ConformanceConfigParser[T], dao: MenasDAO): DataFrame = { + (implicit spark: SparkSession, cmd: ConformanceConfigParser[T], dao: MenasDAO, + fsUtils: DistributedFsUtils): DataFrame = { val recordIdGenerationStrategy = getRecordIdGenerationStrategyFromConfig(conf) implicit val featureSwitcher: FeatureSwitches = conformanceReader.readFeatureSwitches() Try { - // handleControlInfoValidation() // TODO fix for s3 [ref issue #1416] - DynamicInterpreter.interpret(preparationResult.dataset, inputData) + handleControlInfoValidation() + DynamicInterpreter().interpret(preparationResult.dataset, inputData) } match { case Failure(e: ValidationException) => - // AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, e.techDetails) // TODO fix for s3 [ref issue #1416] + AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, e.techDetails) throw e case Failure(NonFatal(e)) => val sw = new StringWriter e.printStackTrace(new PrintWriter(sw)) - // AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString) // TODO fix for s3 [ref issue #1416] + AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString) throw e case Success(conformedDF) => if (SchemaUtils.fieldExists(Constants.EnceladusRecordId, conformedDF.schema)) { @@ -133,56 +136,51 @@ trait ConformanceExecution extends CommonJobExecution { menasCredentials: MenasCredentials) (implicit spark: SparkSession, cmd: ConformanceConfigParser[T], - fsUtils: FileSystemVersionUtils): Unit = { + fsUtils: DistributedFsUtils): Unit = { val cmdLineArgs: String = args.mkString(" ") - // TODO fix for s3 [ref issue #1416] -// PerformanceMetricTools.addJobInfoToAtumMetadata( -// "conform", -// preparationResult.pathCfg.standardizationPath, -// preparationResult.pathCfg.publishPath, -// menasCredentials.username, cmdLineArgs -// ) + PerformanceMetricTools.addJobInfoToAtumMetadata( + "conform", + preparationResult.pathCfg.standardizationPath, + preparationResult.pathCfg.publishPath, + menasCredentials.username, cmdLineArgs + ) val withPartCols = result .withColumnIfDoesNotExist(InfoDateColumn, to_date(lit(cmd.reportDate), ReportDateFormat)) .withColumnIfDoesNotExist(InfoDateColumnString, lit(cmd.reportDate)) .withColumnIfDoesNotExist(InfoVersionColumn, lit(preparationResult.reportVersion)) - // TODO fix for s3 [ref issue #1416] - val recordCount = -1 -// val recordCount = result.lastCheckpointRowCount match { -// case None => withPartCols.count -// case Some(p) => p -// } + val recordCount: Long = result.lastCheckpointRowCount match { + case None => withPartCols.count + case Some(p) => p + } if (recordCount == 0) { handleEmptyOutput(SourcePhase.Conformance) } - // ensure the whole path but version exists - //fsUtils.createAllButLastSubDir(preparationResult.pathCfg.publishPath) // TODO fix for s3 [ref issue #1416] - withPartCols.write.parquet(preparationResult.pathCfg.publishPath) - // TODO fix for s3 [ref issue #1416] - //val publishDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.publishPath) - // preparationResult.performance.finishMeasurement(publishDirSize, recordCount) -// PerformanceMetricTools.addPerformanceMetricsToAtumMetadata( -// spark, -// "conform", -// preparationResult.pathCfg.standardizationPath, -// preparationResult.pathCfg.publishPath, -// menasCredentials.username, cmdLineArgs -// ) - - // TODO fix for s3 [ref issue #1416] - //withPartCols.writeInfoFile(preparationResult.pathCfg.publishPath) - //writePerformanceMetrics(preparationResult.performance, cmd) - - // TODO fix for s3 [ref issue #1416] -// if (conformanceReader.isAutocleanStdFolderEnabled()) { -// fsUtils.deleteDirectoryRecursively(preparationResult.pathCfg.standardizationPath) -// } + val publishDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.publishPath) + preparationResult.performance.finishMeasurement(publishDirSize, recordCount) + PerformanceMetricTools.addPerformanceMetricsToAtumMetadata( + spark, + "conform", + preparationResult.pathCfg.standardizationPath, + preparationResult.pathCfg.publishPath, + menasCredentials.username, cmdLineArgs + ) + + val infoFilePath = s"${preparationResult.pathCfg.publishPath}/_INFO" + val infoFileLocation = infoFilePath.toS3Location(preparationResult.s3Config.region) + log.info(s"infoFilePath = $infoFilePath, infoFileLocation = $infoFileLocation") + + withPartCols.writeInfoFileOnS3(infoFileLocation, S3KmsSettings(preparationResult.s3Config.kmsKeyId)) + writePerformanceMetrics(preparationResult.performance, cmd) + + if (conformanceReader.isAutocleanStdFolderEnabled()) { + fsUtils.deleteDirectoryRecursively(preparationResult.pathCfg.standardizationPath) + } log.info(s"$sourceId finished successfully") } } diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala index 15d2a7dc5..b48092382 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/DynamicConformanceJob.scala @@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.rest.RestDaoFactory -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.modules.SourcePhase object DynamicConformanceJob extends ConformanceExecution { @@ -32,7 +32,7 @@ object DynamicConformanceJob extends ConformanceExecution { implicit val cmd: ConformanceConfig = ConformanceConfig.getFromArguments(args) implicit val spark: SparkSession = obtainSparkSession(jobName) // initialize spark - implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + implicit val fsUtils: DistributedFsUtils = getS3FsUtil val menasCredentials = cmd.menasCredentialsFactory.getInstance() implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls) diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala index dd19e503d..3ae28f8f7 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/HyperConformance.scala @@ -32,6 +32,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.auth.{MenasCredentialsFactory, MenasKerberosCredentialsFactory, MenasPlainCredentialsFactory} import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory} import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.hyperdrive.ingestor.api.transformer.{StreamTransformer, StreamTransformerFactory} class HyperConformance (implicit cmd: ConformanceConfig, @@ -64,7 +65,10 @@ class HyperConformance (implicit cmd: ConformanceConfig, val infoDateColumn = infoDateFactory.getInfoDateColumn(rawDf) - val conformedDf = DynamicInterpreter.interpret(conformance, rawDf) + // using HDFS implementation until HyperConformance is S3-ready + implicit val fsUtils: HdfsUtils = new HdfsUtils(sparkSession.sparkContext.hadoopConfiguration) + + val conformedDf = DynamicInterpreter().interpret(conformance, rawDf) .withColumnIfDoesNotExist(InfoDateColumn, coalesce(infoDateColumn, current_date())) .withColumnIfDoesNotExist(InfoDateColumnString, coalesce(date_format(infoDateColumn,"yyyy-MM-dd"), lit(""))) .withColumnIfDoesNotExist(InfoVersionColumn, lit(reportVersion)) diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala index deace3822..00d9baae2 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/conformance/interpreter/DynamicInterpreter.scala @@ -31,12 +31,12 @@ import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, _} import za.co.absa.enceladus.model.{Dataset => ConfDataset} import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.explode.ExplosionContext -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.general.Algorithms import za.co.absa.enceladus.utils.schema.SchemaUtils import za.co.absa.enceladus.utils.udf.UDFLibrary -object DynamicInterpreter { +case class DynamicInterpreter(implicit fsUtils: DistributedFsUtils) { private val log = LoggerFactory.getLogger(this.getClass) /** @@ -55,11 +55,11 @@ object DynamicInterpreter { implicit val interpreterContext: InterpreterContext = InterpreterContext(inputDf.schema, conformance, featureSwitches, jobShortName, spark, dao, InterpreterContextArgs.fromConformanceConfig(progArgs)) - // applyCheckpoint(inputDf, "Start") // TODO fix for s3 [ref issue #1416] + applyCheckpoint(inputDf, "Start") val conformedDf = applyConformanceRules(ensureErrorColumnExists(inputDf)) - // applyCheckpoint(conformedDf, "End") // TODO fix for s3 [ref issue #1416] + applyCheckpoint(conformedDf, "End") logExecutionPlan(conformedDf) conformedDf @@ -264,7 +264,6 @@ object DynamicInterpreter { */ private def getMappingTableSizeMb(rule: MappingConformanceRule) (implicit ictx: InterpreterContext): Int = { - val fsUtils = new FileSystemVersionUtils(ictx.spark.sparkContext.hadoopConfiguration) val mappingTableDef = ictx.dao.getMappingTable(rule.mappingTable, rule.mappingTableVersion) val mappingTablePath = PartitioningUtils.getPartitionedPathName(mappingTableDef.hdfsPath, diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala index beb9db4e4..0d0a57d57 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationExecution.scala @@ -22,8 +22,11 @@ import org.apache.spark.sql.types.{StructField, StructType} import org.apache.spark.sql.{Column, DataFrame, SparkSession} import za.co.absa.atum.AtumImplicits import za.co.absa.atum.core.Atum +import za.co.absa.atum.persistence.S3KmsSettings +import za.co.absa.enceladus.S3DefaultCredentialsProvider import za.co.absa.enceladus.common.RecordIdGeneration.getRecordIdGenerationStrategyFromConfig -import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig} +import za.co.absa.atum.utils.S3Utils.StringS3LocationExt +import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config} import za.co.absa.enceladus.common.plugin.menas.MenasPlugin import za.co.absa.enceladus.common.{CommonJobExecution, Constants} import za.co.absa.enceladus.dao.MenasDAO @@ -32,7 +35,7 @@ import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.config.{StandardizationConfig, StandardizationConfigParser} import za.co.absa.enceladus.standardization.interpreter.StandardizationInterpreter import za.co.absa.enceladus.standardization.interpreter.stages.PlainSchemaGenerator -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.{DistributedFsUtils, HdfsUtils} import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.performance.PerformanceMetricTools import za.co.absa.enceladus.utils.schema.{MetadataKeys, SchemaUtils, SparkUtils} @@ -41,7 +44,7 @@ import za.co.absa.enceladus.utils.validation.ValidationException import scala.util.control.NonFatal -trait StandardizationExecution extends CommonJobExecution { +trait StandardizationExecution extends CommonJobExecution with S3DefaultCredentialsProvider { private val sourceId = SourcePhase.Standardization protected def prepareStandardization[T](args: Array[String], @@ -49,23 +52,30 @@ trait StandardizationExecution extends CommonJobExecution { preparationResult: PreparationResult) (implicit dao: MenasDAO, cmd: StandardizationConfigParser[T], - fsUtils: FileSystemVersionUtils, + fsUtils: DistributedFsUtils, spark: SparkSession): StructType = { - // val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.rawPath) - // preparationResult.performance.startMeasurement(stdDirSize) // TODO fix for s3 [ref issue #1416] + val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.rawPath) + preparationResult.performance.startMeasurement(stdDirSize) // Enable Control Framework - - // TODO fix for s3 [ref issue #1416] import za.co.absa.atum.AtumImplicits.SparkSessionWrapper -// spark.enableControlMeasuresTracking(s"${preparationResult.pathCfg.rawPath}/_INFO") -// .setControlMeasuresWorkflow(sourceId.toString) + + val inputDataS3Location = preparationResult.pathCfg.rawPath.toS3Location(preparationResult.s3Config.region) + val inputInfoS3Location = inputDataS3Location.copy(path = s"${inputDataS3Location.path}/_INFO") + + val outputDataS3Location = preparationResult.pathCfg.standardizationPath.toS3Location(preparationResult.s3Config.region) + val outputInfoS3Location = outputDataS3Location.copy(path = s"${outputDataS3Location.path}/_INFO") + val kmsSettings = S3KmsSettings(preparationResult.s3Config.kmsKeyId) + + spark.enableControlMeasuresTrackingForS3(sourceS3Location = Some(inputInfoS3Location), + destinationS3Config = Some(outputInfoS3Location, kmsSettings)) + .setControlMeasuresWorkflow(sourceId.toString) log.info(s"raw path: ${preparationResult.pathCfg.rawPath}") log.info(s"standardization path: ${preparationResult.pathCfg.standardizationPath}") // Enable control framework performance optimization for pipeline-like jobs - //Atum.setAllowUnpersistOldDatasets(true) // TODO fix for s3 [ref issue #1416] + Atum.setAllowUnpersistOldDatasets(true) // Enable Menas plugin for Control Framework MenasPlugin.enableMenas( @@ -76,19 +86,16 @@ trait StandardizationExecution extends CommonJobExecution { preparationResult.reportVersion) // Add report date and version (aka Enceladus info date and version) to Atum's metadata - // TODO fix for s3 [ref issue #1416] - //Atum.setAdditionalInfo(Constants.InfoDateColumn -> cmd.reportDate) - //Atum.setAdditionalInfo(Constants.InfoVersionColumn -> preparationResult.reportVersion.toString) + Atum.setAdditionalInfo(Constants.InfoDateColumn -> cmd.reportDate) + Atum.setAdditionalInfo(Constants.InfoVersionColumn -> preparationResult.reportVersion.toString) - // TODO fix for s3 [ref issue #1416] // Add the raw format of the input file(s) to Atum's metadata - //Atum.setAdditionalInfo("raw_format" -> cmd.rawFormat) + Atum.setAdditionalInfo("raw_format" -> cmd.rawFormat) - // TODO fix for s3 [ref issue #1416] -// PerformanceMetricTools.addJobInfoToAtumMetadata("std", -// preparationResult.pathCfg.rawPath, -// preparationResult.pathCfg.standardizationPath, -// menasCredentials.username, args.mkString(" ")) + PerformanceMetricTools.addJobInfoToAtumMetadata("std", + preparationResult.pathCfg.rawPath, + preparationResult.pathCfg.standardizationPath, + menasCredentials.username, args.mkString(" ")) dao.getSchema(preparationResult.dataset.schemaName, preparationResult.dataset.schemaVersion) } @@ -101,8 +108,8 @@ trait StandardizationExecution extends CommonJobExecution { } } - override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = { - validateIfPathAlreadyExists(fsUtils: FileSystemVersionUtils, pathConfig.standardizationPath) + override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = { + validateIfPathAlreadyExists(s3Config, pathConfig.standardizationPath) } protected def readStandardizationInputData[T](schema: StructType, @@ -110,7 +117,7 @@ trait StandardizationExecution extends CommonJobExecution { path: String, dataset: Dataset) (implicit spark: SparkSession, - fsUtils: FileSystemVersionUtils, + fsUtils: DistributedFsUtils, dao: MenasDAO): DataFrame = { val numberOfColumns = schema.fields.length val standardizationReader = new StandardizationPropertiesProvider() @@ -130,6 +137,8 @@ trait StandardizationExecution extends CommonJobExecution { private def getColumnNameOfCorruptRecord[R](schema: StructType, cmd: StandardizationConfigParser[R]) (implicit spark: SparkSession): Option[String] = { // SparkUtils.setUniqueColumnNameOfCorruptRecord is called even if result is not used to avoid conflict + + import AtumImplicits.DataSetWrapper val columnNameOfCorruptRecord = SparkUtils.setUniqueColumnNameOfCorruptRecord(spark, schema) if (cmd.rawFormat.equalsIgnoreCase("fixed-width") || cmd.failOnInputNotPerSchema) { None @@ -144,20 +153,18 @@ trait StandardizationExecution extends CommonJobExecution { val recordIdGenerationStrategy = getRecordIdGenerationStrategyFromConfig(conf) try { - //handleControlInfoValidation() // TODO fix for s3 [ref issue #1416] + handleControlInfoValidation() StandardizationInterpreter.standardize(inputData, schema, cmd.rawFormat, cmd.failOnInputNotPerSchema, recordIdGenerationStrategy) } catch { case e@ValidationException(msg, errors) => val errorDescription = s"$msg\nDetails: ${errors.mkString("\n")}" - // TODO fix for s3 [ref issue #1416] - //AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError("Schema Validation", errorDescription, "") + AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError("Schema Validation", errorDescription, "") throw e case NonFatal(e) if !e.isInstanceOf[ValidationException] => val sw = new StringWriter e.printStackTrace(new PrintWriter(sw)) - // TODO fix for s3 [ref issue #1416] - //AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString) + AtumImplicits.SparkSessionWrapper(spark).setControlMeasurementError(sourceId.toString, e.getMessage, sw.toString) throw e } } @@ -169,21 +176,19 @@ trait StandardizationExecution extends CommonJobExecution { cmd: StandardizationConfigParser[T], menasCredentials: MenasCredentials) (implicit spark: SparkSession, - fsUtils: FileSystemVersionUtils): DataFrame = { + fsUtils: DistributedFsUtils): DataFrame = { import za.co.absa.atum.AtumImplicits._ val fieldRenames = SchemaUtils.getRenamesInSchema(schema) fieldRenames.foreach { case (destinationName, sourceName) => standardizedDF.registerColumnRename(sourceName, destinationName) } - // standardizedDF.setCheckpoint(s"$sourceId - End", persistInDatabase = false) // TODO fix for s3 [ref issue #1416] + standardizedDF.setCheckpoint(s"$sourceId - End", persistInDatabase = false) - // TODO fix for s3 [ref issue #1416] -// val recordCount = standardizedDF.lastCheckpointRowCount match { -// case None => standardizedDF.count -// case Some(p) => p -// } - val recordCount = standardizedDF.count() + val recordCount = standardizedDF.lastCheckpointRowCount match { + case None => standardizedDF.count + case Some(p) => p + } if (recordCount == 0) { handleEmptyOutput(sourceId) @@ -191,27 +196,31 @@ trait StandardizationExecution extends CommonJobExecution { log.info(s"Writing into standardized path ${preparationResult.pathCfg.standardizationPath}") standardizedDF.write.parquet(preparationResult.pathCfg.standardizationPath) + // Store performance metrics // (record count, directory sizes, elapsed time, etc. to _INFO file metadata and performance file) - // TODO fix for s3 [ref issue #1416] - // val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath) - // preparationResult.performance.finishMeasurement(stdDirSize, recordCount) -// PerformanceMetricTools.addPerformanceMetricsToAtumMetadata( -// spark, -// "std", -// preparationResult.pathCfg.rawPath, -// preparationResult.pathCfg.standardizationPath, -// menasCredentials.username, -// args.mkString(" ") -// ) - - // TODO fix for s3 [ref issue #1416] - //cmd.rowTag.foreach(rowTag => Atum.setAdditionalInfo("xml_row_tag" -> rowTag)) - //cmd.csvDelimiter.foreach(delimiter => Atum.setAdditionalInfo("csv_delimiter" -> delimiter)) - - // standardizedDF.writeInfoFile(preparationResult.pathCfg.standardizationPath) // TODO fix for s3 [ref issue #1416] - //writePerformanceMetrics(preparationResult.performance, cmd) + val stdDirSize = fsUtils.getDirectorySize(preparationResult.pathCfg.standardizationPath) + preparationResult.performance.finishMeasurement(stdDirSize, recordCount) + + PerformanceMetricTools.addPerformanceMetricsToAtumMetadata( + spark, + "std", + preparationResult.pathCfg.rawPath, + preparationResult.pathCfg.standardizationPath, + menasCredentials.username, + args.mkString(" ") + ) + + cmd.rowTag.foreach(rowTag => Atum.setAdditionalInfo("xml_row_tag" -> rowTag)) + cmd.csvDelimiter.foreach(delimiter => Atum.setAdditionalInfo("csv_delimiter" -> delimiter)) + + val infoFilePath = s"${preparationResult.pathCfg.standardizationPath}/_INFO" + val infoFileLocation = infoFilePath.toS3Location(preparationResult.s3Config.region) + log.info(s"infoFilePath = $infoFilePath, infoFileLocation = $infoFileLocation") + + standardizedDF.writeInfoFileOnS3(infoFileLocation, S3KmsSettings(preparationResult.s3Config.kmsKeyId)) + writePerformanceMetrics(preparationResult.performance, cmd) log.info(s"$sourceId finished successfully") standardizedDF } @@ -219,34 +228,41 @@ trait StandardizationExecution extends CommonJobExecution { //scalastyle:off parameter.number private def ensureSplittable(df: DataFrame, path: String, schema: StructType) - (implicit spark: SparkSession, fsUtils: FileSystemVersionUtils) = { - // TODO fix for s3 [ref issue #1416] -// if (fsUtils.isNonSplittable(path)) { -// convertToSplittable(df, schema) -// } else { + (implicit spark: SparkSession, fsUtils: DistributedFsUtils): DataFrame = { + if (fsUtils.isNonSplittable(path)) { + convertToSplittable(df, schema) + } else { df -// } + } } private def convertToSplittable(df: DataFrame, schema: StructType) - (implicit spark: SparkSession, fsUtils: FileSystemVersionUtils) = { + (implicit spark: SparkSession, fsUtils: DistributedFsUtils): DataFrame = { log.warn("Dataset is stored in a non-splittable format. This can have a severe performance impact.") - val tempParquetDir = s"/tmp/nonsplittable-to-parquet-${UUID.randomUUID()}" - log.warn(s"Converting to Parquet in temporary dir: $tempParquetDir") - - // Handle renaming of source columns in case there are columns - // that will break because of issues in column names like spaces - df.select(schema.fields.map { field: StructField => - renameSourceColumn(df, field) - }: _*).write.parquet(tempParquetDir) - - fsUtils.deleteOnExit(tempParquetDir) - // Reload from temp parquet and reverse column renaming above - val dfTmp = spark.read.parquet(tempParquetDir) - dfTmp.select(schema.fields.map { field: StructField => - reverseRenameSourceColumn(dfTmp, field) - }: _*) + fsUtils match { + case utils: HdfsUtils => + val tempParquetDir = s"/tmp/nonsplittable-to-parquet-${UUID.randomUUID()}" + log.warn(s"Converting to Parquet in temporary dir: $tempParquetDir") + + // Handle renaming of source columns in case there are columns + // that will break because of issues in column names like spaces + df.select(schema.fields.map { field: StructField => + renameSourceColumn(df, field) + }: _*).write.parquet(tempParquetDir) + + utils.deleteOnExit(tempParquetDir) + // Reload from temp parquet and reverse column renaming above + val dfTmp = spark.read.parquet(tempParquetDir) + dfTmp.select(schema.fields.map { field: StructField => + reverseRenameSourceColumn(dfTmp, field) + }: _*) + + case utils => + log.warn(s"Splittability conversion only available for HDFS, leaving as is for ${utils.getClass.getName}") + df + } + } private def renameSourceColumn(df: DataFrame, field: StructField): Column = { diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala index b9ff4d9ca..c123a5a1b 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization/StandardizationJob.scala @@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.rest.RestDaoFactory import za.co.absa.enceladus.standardization.config.StandardizationConfig -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.udf.UDFLibrary @@ -31,7 +31,8 @@ object StandardizationJob extends StandardizationExecution { implicit val cmd: StandardizationConfig = StandardizationConfig.getFromArguments(args) implicit val spark: SparkSession = obtainSparkSession(jobName) - implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + implicit val fsUtils: DistributedFsUtils = getS3FsUtil + implicit val udfLib: UDFLibrary = new UDFLibrary val menasCredentials = cmd.menasCredentialsFactory.getInstance() implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls) diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala index 1a43e1396..1347fc6c2 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceExecution.scala @@ -16,12 +16,12 @@ package za.co.absa.enceladus.standardization_conformance import za.co.absa.enceladus.common.CommonJobExecution -import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig} +import za.co.absa.enceladus.common.config.{JobConfigParser, PathConfig, S3Config} import za.co.absa.enceladus.conformance.ConformanceExecution import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.StandardizationExecution import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils trait StandardizationAndConformanceExecution extends StandardizationExecution with ConformanceExecution @@ -36,8 +36,8 @@ trait StandardizationAndConformanceExecution extends StandardizationExecution publishPath = publishPathOverride.getOrElse(defaultConfig.publishPath)) } - override def validateOutputPath(fsUtils: FileSystemVersionUtils, pathConfig: PathConfig): Unit = { - validateIfPathAlreadyExists(fsUtils, pathConfig.standardizationPath) - validateIfPathAlreadyExists(fsUtils, pathConfig.publishPath) + override def validateOutputPath(s3Config: S3Config, pathConfig: PathConfig)(implicit fsUtils: DistributedFsUtils): Unit = { + validateIfPathAlreadyExists(s3Config, pathConfig.standardizationPath) + validateIfPathAlreadyExists(s3Config, pathConfig.publishPath) } } diff --git a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala index b0509ba26..6bb023d1c 100644 --- a/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala +++ b/spark-jobs/src/main/scala/za/co/absa/enceladus/standardization_conformance/StandardizationAndConformanceJob.scala @@ -19,7 +19,7 @@ import org.apache.spark.sql.SparkSession import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.dao.rest.RestDaoFactory import za.co.absa.enceladus.standardization_conformance.config.StandardizationConformanceConfig -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.modules.SourcePhase import za.co.absa.enceladus.utils.udf.UDFLibrary @@ -31,7 +31,7 @@ object StandardizationAndConformanceJob extends StandardizationAndConformanceExe implicit val cmd: StandardizationConformanceConfig = StandardizationConformanceConfig.getFromArguments(args) implicit val spark: SparkSession = obtainSparkSession(jobName) - implicit val fsUtils: FileSystemVersionUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + implicit val fsUtils: DistributedFsUtils = getS3FsUtil implicit val udfLib: UDFLibrary = new UDFLibrary val menasCredentials = cmd.menasCredentialsFactory.getInstance() implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala index 938ed2503..ed3c91b90 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/ControlInfoValidationSuite.scala @@ -15,39 +15,41 @@ package za.co.absa.enceladus.common -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite +import za.co.absa.atum.core.ControlType import za.co.absa.atum.model.{Checkpoint, Measurement} + import scala.util.Success -class ControlInfoValidationSuite extends FunSuite { +class ControlInfoValidationSuite extends AnyFunSuite { import za.co.absa.atum.core.Constants._ private val checkpoints1 = List( Checkpoint("raw", None, None, "", "", "", 0, List( - Measurement("", controlTypeAbsAggregatedTotal, "", 0), - Measurement("", controlTypeRecordCount, "", 11) + Measurement("", ControlType.AbsAggregatedTotal.value, "", 0), + Measurement("", ControlType.Count.value, "", 11) ) ), Checkpoint("source", None, None, "", "", "", 1, List( - Measurement("", controlTypeRecordCount, "", 3) + Measurement("", ControlType.Count.value, "", 3) ) ) ) private val checkpoints2 = List( Checkpoint("source", None, None, "", "", "", 1, List( - Measurement("", controlTypeDistinctCount, "", 1) + Measurement("", ControlType.DistinctCount.value, "", 1) ) ) ) private val checkpoints3 = List( Checkpoint("raw", None, None, "", "", "", 0, List( - Measurement("", controlTypeRecordCount, "", -3) + Measurement("", ControlType.Count.value, "", -3) ) ), Checkpoint("source", None, None, "", "", "", 1, List( - Measurement("", controlTypeRecordCount, "", "") + Measurement("", ControlType.Count.value, "", "") ) ) ) @@ -65,7 +67,7 @@ class ControlInfoValidationSuite extends FunSuite { val sourceResult = ControlInfoValidation.getCountFromGivenCheckpoint("source", checkpoints2) val rawError = "Missing raw checkpoint" - val sourceError = s"source checkpoint does not have a $controlTypeRecordCount control" + val sourceError = s"source checkpoint does not have a ${ControlType.Count.value} control" assert(rawResult.failed.get.getMessage == rawError) assert(sourceResult.failed.get.getMessage == sourceError) @@ -75,8 +77,8 @@ class ControlInfoValidationSuite extends FunSuite { val rawResult = ControlInfoValidation.getCountFromGivenCheckpoint("raw", checkpoints3) val sourceResult = ControlInfoValidation.getCountFromGivenCheckpoint("source", checkpoints3) - val rawError = s"Wrong raw $controlTypeRecordCount value: Negative value" - val sourceError = s"""Wrong source $controlTypeRecordCount value: For input string: \"\"""" + val rawError = s"Wrong raw ${ControlType.Count.value} value: Negative value" + val sourceError = s"""Wrong source ${ControlType.Count.value} value: For input string: \"\"""" assert(rawResult.failed.get.getMessage == rawError) assert(sourceResult.failed.get.getMessage == sourceError) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala index 8d23390f7..49791c522 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/RecordIdGenerationSuite.scala @@ -18,13 +18,14 @@ package za.co.absa.enceladus.common import java.util.UUID import com.typesafe.config.{Config, ConfigException, ConfigFactory, ConfigValueFactory} -import org.scalatest.{FlatSpec, Matchers} import za.co.absa.enceladus.common.RecordIdGenerationSuite.{SomeData, SomeDataWithId} import za.co.absa.enceladus.utils.testUtils.SparkTestBase import RecordIdGeneration._ import IdType._ +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers -class RecordIdGenerationSuite extends FlatSpec with Matchers with SparkTestBase { +class RecordIdGenerationSuite extends AnyFlatSpec with Matchers with SparkTestBase { import spark.implicits._ val data1 = Seq( diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala index 3f6bcd749..d3567a544 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/ControlMetricsPluginSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.common.plugin import com.typesafe.config.ConfigFactory -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.common.plugin.dummy.{DummyControlMetricsPlugin1, DummyControlMetricsPlugin2} import za.co.absa.enceladus.plugins.api.control.ControlMetricsPlugin import scala.collection.JavaConverters._ -class ControlMetricsPluginSuite extends FunSuite { +class ControlMetricsPluginSuite extends AnyFunSuite { test("Test the control plugin loader loads nothing if no class is specified") { val conf = ConfigFactory.parseMap(Map[String, String]().asJava) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala index ef2eeace8..b3d856126 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/plugin/PostProcessorPluginSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.common.plugin import com.typesafe.config.ConfigFactory -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.common.plugin.dummy.{DummyPostProcessor1, DummyPostProcessor2} import za.co.absa.enceladus.plugins.api.postprocessor.PostProcessor import scala.collection.JavaConverters._ -class PostProcessorPluginSuite extends FunSuite { +class PostProcessorPluginSuite extends AnyFunSuite { test("Test the postprocessor loader loads nothing if no class is specified") { val conf = ConfigFactory.parseMap(Map[String, String]().asJava) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala index 2d649a598..9678192d5 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/common/version/SparkVersionGuardSuite.scala @@ -15,16 +15,17 @@ package za.co.absa.enceladus.common.version -import org.mockito.ArgumentMatchers._ import org.mockito.Mockito -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Assertion, FlatSpec, Matchers} +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Assertion +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers import org.slf4j.Logger import za.co.absa.commons.version.Version._ import scala.reflect.ClassTag -class SparkVersionGuardSuite extends FlatSpec with Matchers with MockitoSugar { +class SparkVersionGuardSuite extends AnyFlatSpec with Matchers with MockitoSugar { private def ensureThrowsWithMessageIncluding[T <: Throwable](messageSubstringToAppear: String)(fun: => scala.Any) (implicit ev: ClassTag[T]): Assertion = { diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala index f2d42c5e5..04ef0ce3c 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/config/ConformanceParserSuite.scala @@ -17,13 +17,13 @@ package za.co.absa.enceladus.conformance.config import java.time.ZonedDateTime -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.ConformanceExecution import za.co.absa.enceladus.dao.auth.{MenasKerberosCredentials, MenasPlainCredentials} import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class ConformanceParserSuite extends FunSuite with SparkTestBase { +class ConformanceParserSuite extends AnyFunSuite with SparkTestBase { private val year = "2018" private val month = "12" diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala index 81c436577..c9e7d77fd 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/datasource/DatasourceSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.conformance.datasource -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.EmployeeConformance import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class DatasourceSuite extends FunSuite with SparkTestBase { +class DatasourceSuite extends AnyFunSuite with SparkTestBase { test("Data Source loads all data needed for test sample") { diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala index dcb6e46a6..f93d56426 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ArrayConformanceSuite.scala @@ -17,17 +17,20 @@ package za.co.absa.enceladus.conformance.interpreter import org.apache.spark.sql.functions._ import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.{BeforeAndAfterAll} import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.datasource.DataSource import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.conformance.samples._ +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAfterAll { +class ArrayConformanceSuite extends AnyFunSuite with SparkTestBase with BeforeAndAfterAll { import spark.implicits._ // spark.enableControlFrameworkTracking() + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) implicit var dao: MenasDAO = _ implicit var progArgs: ConformanceConfig = _ @@ -58,7 +61,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val conformedDf = DynamicInterpreter.interpret(ArraySamples.conformanceDef, + val conformedDf = DynamicInterpreter().interpret(ArraySamples.conformanceDef, df) val expected = ArraySamples.conformedData.toArray.sortBy(_.order).toList val conformed = conformedDf.as[ConformedOuter].collect().sortBy(_.order).toList @@ -83,7 +86,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val conformedDf = DynamicInterpreter.interpret(NullArraySamples.mappingOnlyConformanceDef, + val conformedDf = DynamicInterpreter().interpret(NullArraySamples.mappingOnlyConformanceDef, df) val expected = NullArraySamples.conformedData.toArray.sortBy(_.order).toList @@ -113,7 +116,7 @@ class ArrayConformanceSuite extends FunSuite with SparkTestBase with BeforeAndAf .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val conformedDf = DynamicInterpreter.interpret(EmtpyArraySamples.mappingOnlyConformanceDef, + val conformedDf = DynamicInterpreter().interpret(EmtpyArraySamples.mappingOnlyConformanceDef, df) val expected = EmtpyArraySamples.conformedData.toArray.sortBy(_.order).toList val conformed = conformedDf.as[OuterErr].collect().sortBy(_.order).toList diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala index 2d4873474..9d5c94eca 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/ChorusMockSuite.scala @@ -16,12 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.datasource.DataSource import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.conformanceRule.MappingConformanceRule import za.co.absa.enceladus.model.{MappingTable, Dataset => ConfDataset} +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} case class MyMappingTable(id: Int, mappedAttr: MyMappingTableInner) @@ -29,7 +30,7 @@ case class MyMappingTableInner(description: String, name: String) case class MyData(id: Int, toJoin: Int) case class MyDataConfd(id: Int, toJoin: Int, confMapping: MyMappingTableInner) -class ChorusMockSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class ChorusMockSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { def testChorusMockData(useExperimentalMappingRule: Boolean): Unit = { val d = Seq( @@ -71,7 +72,9 @@ class ChorusMockSuite extends FunSuite with SparkTestBase with LoggerTestBase { .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val confd = DynamicInterpreter.interpret(conformanceDef, inputDf).repartition(2) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + + val confd = DynamicInterpreter().interpret(conformanceDef, inputDf).repartition(2) logDataFrameContent(confd) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala index 47e09060b..fdc49e9b4 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/InterpreterSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.conformance.interpreter import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.{BeforeAndAfterAll} import za.co.absa.atum.model.ControlMeasure import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.datasource.DataSource @@ -25,9 +25,10 @@ import za.co.absa.enceladus.conformance.samples._ import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import org.json4s._ import org.json4s.jackson._ -import za.co.absa.enceladus.utils.fs.FileReader +import org.scalatest.funsuite.AnyFunSuite +import za.co.absa.enceladus.utils.fs.{FileReader, HdfsUtils} -class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAll with LoggerTestBase { +class InterpreterSuite extends AnyFunSuite with SparkTestBase with BeforeAndAfterAll with LoggerTestBase { override def beforeAll(): Unit = { super.beforeAll @@ -55,6 +56,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl val isCatalystWorkaroundEnabled = true import spark.implicits._ + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val mappingTablePattern = "{0}/{1}/{2}" val dfs = DataSource.getDataFrame(EmployeeConformance.employeeDS.hdfsPath, "2017-11-01", mappingTablePattern) @@ -70,7 +72,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val conformed = DynamicInterpreter.interpret(EmployeeConformance.employeeDS, dfs) + val conformed = DynamicInterpreter().interpret(EmployeeConformance.employeeDS, dfs) val data = conformed.as[ConformedEmployee].collect.sortBy(_.employee_id).toList val expected = EmployeeConformance.conformedEmployees.sortBy(_.employee_id).toList @@ -112,6 +114,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl val isCatalystWorkaroundEnabled = true import spark.implicits._ + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) val mappingTablePattern = "{0}/{1}/{2}" val dfs = DataSource.getDataFrame(TradeConformance.tradeDS.hdfsPath, "2017-11-01", mappingTablePattern) @@ -127,7 +130,7 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val conformed = DynamicInterpreter.interpret(TradeConformance.tradeDS, dfs).cache + val conformed = DynamicInterpreter().interpret(TradeConformance.tradeDS, dfs).cache val data = conformed.repartition(1).orderBy($"id").toJSON.collect.mkString("\n") @@ -168,23 +171,19 @@ class InterpreterSuite extends FunSuite with SparkTestBase with BeforeAndAfterAl }) } - // TODO fix for s3 [ref issue #1416] - ignore("End to end dynamic conformance test") { + test("End to end dynamic conformance test") { testEndToEndDynamicConformance(useExperimentalMappingRule = false) } - // TODO fix for s3 [ref issue #1416] - ignore("End to end dynamic conformance test (experimental optimized mapping rule)") { + test("End to end dynamic conformance test (experimental optimized mapping rule)") { testEndToEndDynamicConformance(useExperimentalMappingRule = true) } - // TODO fix for s3 [ref issue #1416] - ignore("End to end array dynamic conformance test") { + test("End to end array dynamic conformance test") { testEndToEndArrayConformance(useExperimentalMappingRule = false) } - // TODO fix for s3 [ref issue #1416] - ignore("End to end array dynamic conformance test (experimental optimized mapping rule)") { + test("End to end array dynamic conformance test (experimental optimized mapping rule)") { testEndToEndArrayConformance(useExperimentalMappingRule = true) } } diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala index fa78f5a87..5853d33e9 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/LiteralJoinMappingRuleTest.scala @@ -16,15 +16,16 @@ package za.co.absa.enceladus.conformance.interpreter import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.datasource.DataSource import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.conformanceRule.{DropConformanceRule, LiteralConformanceRule, MappingConformanceRule} import za.co.absa.enceladus.model.{MappingTable, Dataset => ConfDataset} +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with LoggerTestBase { +class LiteralJoinMappingRuleTest extends AnyFunSuite with SparkTestBase with LoggerTestBase { def testMappingRuleWithLiteral(useExperimentalMappingRule: Boolean): Unit = { @@ -55,9 +56,9 @@ class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with Logger conformance = List( LiteralConformanceRule(order = 1, outputColumn = "country", controlCheckpoint = true, value = "CZ"), MappingConformanceRule(order = 2, controlCheckpoint = true, mappingTable = "countryMT", mappingTableVersion = 0, - attributeMappings = Map("countryCode" -> "country"), targetAttribute = "countryName", + attributeMappings = Map("countryCode" -> "country"), targetAttribute = "countryName", outputColumn = "conformedCountry", isNullSafe = true), - DropConformanceRule(order = 3, controlCheckpoint = false, outputColumn = "country") + DropConformanceRule(order = 3, controlCheckpoint = false, outputColumn = "country") ) ) @@ -67,7 +68,9 @@ class LiteralJoinMappingRuleTest extends FunSuite with SparkTestBase with Logger .setControlFrameworkEnabled(enableCF) .setBroadcastStrategyMode(Never) - val confd = DynamicInterpreter.interpret(conformanceDef, inputDf).repartition(2) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + + val confd = DynamicInterpreter().interpret(conformanceDef, inputDf).repartition(2) confd.write.mode("overwrite").parquet("_testOutput") val readAgain = spark.read.parquet("_testOutput") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala index b09154200..b044de35c 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/NestedStructSuite.scala @@ -15,8 +15,9 @@ package za.co.absa.enceladus.conformance.interpreter -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.fixtures.NestedStructsFixture +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase /** @@ -24,7 +25,9 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase * * Without applying a workaround any test in this suite makes Spark freeze. */ -class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFixture { +class NestedStructSuite extends AnyFunSuite with SparkTestBase with NestedStructsFixture { + + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) test("Test Dynamic Conformance does not hang on many mixed conformance rules") { implicit val featureSwitches: FeatureSwitches = FeatureSwitches() @@ -32,7 +35,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi .setCatalystWorkaroundEnabled(true) .setControlFrameworkEnabled(false) - val conformed = DynamicInterpreter.interpret(nestedStructsDS, standardizedDf) + val conformed = DynamicInterpreter().interpret(nestedStructsDS, standardizedDf) assert(conformed.count() == 20) } @@ -43,7 +46,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi .setCatalystWorkaroundEnabled(true) .setControlFrameworkEnabled(false) - val conformed = DynamicInterpreter.interpret(nestedStructsUpperDS, standardizedDf) + val conformed = DynamicInterpreter().interpret(nestedStructsUpperDS, standardizedDf) assert(conformed.count() == 20) } @@ -54,7 +57,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi .setCatalystWorkaroundEnabled(true) .setControlFrameworkEnabled(false) - val conformed = DynamicInterpreter.interpret( nestedStructsNegationDS, standardizedDf) + val conformed = DynamicInterpreter().interpret( nestedStructsNegationDS, standardizedDf) assert(conformed.count() == 20) } @@ -65,7 +68,7 @@ class NestedStructSuite extends FunSuite with SparkTestBase with NestedStructsFi .setCatalystWorkaroundEnabled(true) .setControlFrameworkEnabled(false) - val conformed = DynamicInterpreter.interpret(nestedStructsCastingDS, standardizedDf) + val conformed = DynamicInterpreter().interpret(nestedStructsCastingDS, standardizedDf) assert(conformed.count() == 20) } diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala index fc426919e..48b18919b 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/fixtures/StreamingFixture.scala @@ -19,9 +19,8 @@ import org.apache.commons.configuration2.Configuration import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.{DataFrame, Row} -import org.mockito.Mockito.when -import org.scalatest.FunSuite -import org.scalatest.mockito.MockitoSugar +import org.scalatest.funsuite.AnyFunSuite +import org.mockito.scalatest.MockitoSugar import za.co.absa.enceladus.conformance.HyperConformance import za.co.absa.enceladus.conformance.HyperConformanceAttributes._ import za.co.absa.enceladus.conformance.config.ConformanceConfig @@ -31,7 +30,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.utils.testUtils.SparkTestBase -trait StreamingFixture extends FunSuite with SparkTestBase with MockitoSugar { +trait StreamingFixture extends AnyFunSuite with SparkTestBase with MockitoSugar { implicit val menasBaseUrls: List[String] = List.empty implicit val cmd: ConformanceConfig = ConformanceConfig(reportVersion = Some(1)) @@ -51,6 +50,8 @@ trait StreamingFixture extends FunSuite with SparkTestBase with MockitoSugar { when(configStub.containsKey(menasUriKey)).thenReturn(true) when(configStub.getString(menasUriKey)).thenReturn("https://mymenas.org") when(configStub.containsKey(menasAuthKeytabKey)).thenReturn(true) + when(configStub.containsKey(menasCredentialsFileKey)).thenReturn(false) + when(configStub.getString(menasAuthKeytabKey)).thenReturn("key1") val memoryStream = new MemoryStream[Row](1, spark.sqlContext)(RowEncoder(input.schema)) val hyperConformance = HyperConformance(configStub).asInstanceOf[HyperConformance] diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala index dbc56aba3..085fd7dbf 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CastingRuleSuite.scala @@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.types._ import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.slf4j.event.Level.ERROR import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches, RuleValidators} import za.co.absa.enceladus.conformance.samples.CastingRuleSamples import za.co.absa.enceladus.dao.MenasDAO +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -class CastingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class CastingRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { private val ruleName = "Casting rule" private val columnName = "dummy" @@ -51,7 +52,9 @@ class CastingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val conformed = DynamicInterpreter.interpret(CastingRuleSamples.ordersDS, inputDf).cache + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + + val conformed = DynamicInterpreter().interpret(CastingRuleSamples.ordersDS, inputDf).cache val conformedJSON = JsonUtils.prettySparkJSON(conformed.orderBy($"id").toJSON.collect) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala index db66524a3..e44c8d1b6 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/CoalesceRuleSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase import CoalesceRuleSuite._ import za.co.absa.enceladus.conformance.samples.DeepArraySamples @@ -83,7 +83,7 @@ object CoalesceRuleSuite { ) } -class CoalesceRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class CoalesceRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { test("Coalesce conformance rule on root level fields") { val inputDf: DataFrame = spark.createDataFrame(shopItems) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala index 79d89be60..eac2ed4f0 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/ConcatenationRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.{ConcatenationConformanceRule, UppercaseConformanceRule} import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class ConcatenationRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class ConcatenationRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { private val concatRule = ConcatenationConformanceRule(order = 1, outputColumn = "CombinedName", controlCheckpoint = false, Seq("name", "city", "address")) private val concatArrayRule = ConcatenationConformanceRule(order = 2, outputColumn = "rooms.CombinedLabel", diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala index 081827522..c60fe6a16 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/DropRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.DropConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class DropRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class DropRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val dropRule = DropConformanceRule(order = 1, controlCheckpoint = false, outputColumn = "name" ) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala index f9dee622b..00c354eb8 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/FillNullsRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.FillNullsConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class FillNullsRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class FillNullsRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val fillNullsRule = FillNullsConformanceRule( diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala index 4ac42525e..b47ade6ff 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/LiteralRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.LiteralConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class LiteralRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class LiteralRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val literalRule = LiteralConformanceRule(order = 1, outputColumn = "System", controlCheckpoint = false, value = "FA") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala index b6fd23928..479cb7f32 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleBroadcastSuite.scala @@ -17,21 +17,25 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.commons.io.IOUtils import org.apache.spark.sql.functions._ -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.DynamicInterpreter import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.NestedTestCaseFactory._ import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory._ import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.{NestedTestCaseFactory, SimpleTestCaseFactory} import za.co.absa.enceladus.utils.error.ErrorMessage +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll { +class MappingRuleBroadcastSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll { import spark.implicits._ private val simpleTestCaseFactory = new SimpleTestCaseFactory() private val nestedTestCaseFactory = new NestedTestCaseFactory() + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + override def beforeAll(): Unit = { super.beforeAll() simpleTestCaseFactory.createMappingTables() @@ -51,7 +55,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = simpleTestCaseFactory.getTestCase(true, true, simpleMappingRule) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"int_num", $"long_num", $"str_val", $"errCol", $"conformedIntNum") .cache @@ -69,7 +73,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = simpleTestCaseFactory.getTestCase(true, true, simpleMappingRuleWithDefaultValue) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"int_num", $"long_num", $"str_val", $"errCol", $"conformedIntNum") .cache @@ -87,7 +91,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule1) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol", $"conformedNum1") .cache @@ -105,7 +109,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule2) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol", $"conformedNum2") .cache @@ -123,7 +127,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, nestedMappingRule3) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"conformedNum3", $"errCol") .cache @@ -141,7 +145,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule1) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array2", $"errCol", $"array1") .cache @@ -159,7 +163,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule2) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -177,7 +181,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule3) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -195,7 +199,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule4) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -213,7 +217,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule5) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -231,7 +235,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT implicit val (inputDf, dataset, dao, progArgs, featureSwitches) = nestedTestCaseFactory.getTestCase(true, true, arrayMappingRule6) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -251,7 +255,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT val inputDf2 = inputDf.withColumn("errCol", array(typedLit(ErrorMessage("Initial", "000", "ErrMsg", "id", Seq(), Seq())))) - val dfOut = DynamicInterpreter.interpret(dataset, inputDf2) + val dfOut = DynamicInterpreter().interpret(dataset, inputDf2) .select($"id", $"key1", $"key2", $"struct1", $"struct2", $"array1", $"array2", $"errCol") .cache @@ -267,7 +271,7 @@ class MappingRuleBroadcastSuite extends FunSuite with SparkTestBase with LoggerT nestedTestCaseFactory.getTestCase(true, true, wrongMappingRule1) intercept[Exception] { - DynamicInterpreter.interpret(dataset, inputDf) + DynamicInterpreter().interpret(dataset, inputDf) } } diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala index 23970b89a..1b92ebbe0 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleSuite.scala @@ -16,15 +16,19 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.AnalysisException -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.BeforeAndAfterAll +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.DynamicInterpreter import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory import za.co.absa.enceladus.conformance.interpreter.rules.testcasefactories.SimpleTestCaseFactory._ +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll { +class MappingRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with BeforeAndAfterAll { private val testCaseFactory = new SimpleTestCaseFactory() + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + override def beforeAll(): Unit = { super.beforeAll() testCaseFactory.createMappingTables() @@ -40,7 +44,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w testCaseFactory.getTestCase(true, false, nonExistentTableMappingRule) val ex = intercept[AnalysisException] { - DynamicInterpreter.interpret(dataset, inputDf).cache + DynamicInterpreter().interpret(dataset, inputDf).cache } assert(ex.getMessage.contains("Path does not exist")) @@ -51,7 +55,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w testCaseFactory.getTestCase(false, false, nonExistentTableMappingRule) val ex = intercept[AnalysisException] { - DynamicInterpreter.interpret(dataset, inputDf).cache + DynamicInterpreter().interpret(dataset, inputDf).cache } assert(ex.getMessage.contains("Path does not exist")) @@ -62,7 +66,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w testCaseFactory.getTestCase(true, false, emptyTableMappingRule) val ex = intercept[RuntimeException] { - DynamicInterpreter.interpret(dataset, inputDf).cache + DynamicInterpreter().interpret(dataset, inputDf).cache } assert(ex.getMessage.contains("Unable to read the mapping table")) @@ -73,7 +77,7 @@ class MappingRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase w testCaseFactory.getTestCase(false, false, emptyTableMappingRule) val ex = intercept[RuntimeException] { - DynamicInterpreter.interpret(dataset, inputDf).cache + DynamicInterpreter().interpret(dataset, inputDf).cache } assert(ex.getMessage.contains("Unable to read the mapping table")) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala index 14669a07f..a54121fc4 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/MappingRuleValidationSuite.scala @@ -15,13 +15,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.datasource.DataSource import za.co.absa.enceladus.conformance.samples.EmployeeConformance import za.co.absa.enceladus.model.conformanceRule.MappingConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class MappingRuleValidationSuite extends FunSuite with SparkTestBase { +class MappingRuleValidationSuite extends AnyFunSuite with SparkTestBase { // scalastyle:off line.size.limit test("Mapping rule fields existence validation test") { diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala index 515738963..78ea290bb 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/NegationRuleSuite.scala @@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.Dataset import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.slf4j.event.Level.ERROR import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches} import za.co.absa.enceladus.conformance.samples.NegationRuleSamples import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.{Dataset => ConfDataset} +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -class NegationRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase{ +class NegationRuleSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{ import spark.implicits._ @@ -119,7 +120,8 @@ class NegationRuleSuite extends FunSuite with SparkTestBase with LoggerTestBase{ .setExperimentalMappingRuleEnabled(experimentalMR) .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val conformed = DynamicInterpreter.interpret(enceladusDataset, inputDf).cache + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + val conformed = DynamicInterpreter().interpret(enceladusDataset, inputDf).cache val conformedJSON = conformed.toJSON.collect().mkString("\n") if (conformedJSON != expectedJSON) { logger.error("EXPECTED:") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala index 9a4dedfdb..2eb36d395 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RuleOptimizationSuite.scala @@ -16,12 +16,14 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.types.{DataType, StructType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches, InterpreterContext, Never} import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule} import za.co.absa.enceladus.conformance.samples.TradeConformance._ +import za.co.absa.enceladus.utils.fs.HdfsUtils +import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class RuleOptimizationSuite extends FunSuite { +class RuleOptimizationSuite extends AnyFunSuite with SparkTestBase { private val schemaJson = """{ @@ -109,10 +111,12 @@ class RuleOptimizationSuite extends FunSuite { null, null) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + test("Test non-mapping rules are not grouped") { val rules: List[ConformanceRule] = List(litRule, upperRule, lit2Rule) - val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema) + val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema) assert(actualInterpreters.length == 3) assert(actualInterpreters.head.isInstanceOf[LiteralRuleInterpreter]) @@ -123,7 +127,7 @@ class RuleOptimizationSuite extends FunSuite { test("Test mapping rules having the same array are grouped") { val rules: List[ConformanceRule] = List(litRule, countryRule, productRule, lit2Rule) - val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema) + val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema) assert(actualInterpreters.length == 6) assert(actualInterpreters.head.isInstanceOf[LiteralRuleInterpreter]) @@ -137,7 +141,7 @@ class RuleOptimizationSuite extends FunSuite { test("Test single arrays in the beginning and at the end") { val rules: List[ConformanceRule] = List(countryRule, litRule, lit2Rule, productRule) - val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema) + val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema) assert(actualInterpreters.length == 4) assert(actualInterpreters.head.isInstanceOf[MappingRuleInterpreterGroupExplode]) @@ -149,7 +153,7 @@ class RuleOptimizationSuite extends FunSuite { test("Test several arrays in the beginning and at the end") { val rules: List[ConformanceRule] = List(countryRule, productRule, litRule, lit2Rule, productRule, countryRule) - val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema) + val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema) assert(actualInterpreters.length == 10) assert(actualInterpreters.head.isInstanceOf[ArrayExplodeInterpreter]) @@ -173,7 +177,7 @@ class RuleOptimizationSuite extends FunSuite { val rules: List[ConformanceRule] = List(countryRule, productRule, legIdRule, countryRule, legIdRule, countryRule, productRule, legIdRule, legIdRule) - val actualInterpreters = DynamicInterpreter.getInterpreters(rules, schema) + val actualInterpreters = DynamicInterpreter().getInterpreters(rules, schema) assert(actualInterpreters.length == 15) assert(actualInterpreters.head.isInstanceOf[ArrayExplodeInterpreter]) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala index 1bb0a9933..34d29ee5c 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/RulesSuite.scala @@ -18,7 +18,7 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.{ExplosionState, InterpreterContextArgs} import za.co.absa.enceladus.conformance.samples.EmployeeConformance import za.co.absa.enceladus.dao.MenasDAO @@ -26,7 +26,7 @@ import za.co.absa.enceladus.model.conformanceRule.ConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class RulesSuite extends FunSuite with SparkTestBase { +class RulesSuite extends AnyFunSuite with SparkTestBase { private val dummyInterpreter = new RuleInterpreter { override def conformanceRule: Option[ConformanceRule] = None diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala index b54b4f308..9f2be7c95 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SingleColumnRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.SingleColumnConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class SingleColumnRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class SingleColumnRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val singleColumnRule = SingleColumnConformanceRule(order = 1, controlCheckpoint = false, "conformedId", "id", "id2") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala index 5a0e08898..b03b11048 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/SparkSessionRuleSuite.scala @@ -15,13 +15,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.SparkSessionConfConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class SparkSessionRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class SparkSessionRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val sparkSessionRule = SparkSessionConfConformanceRule(order = 1, outputColumn = "TimeZone", controlCheckpoint = false, sparkConfKey = "spark.sql.session.timeZone") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala index 4fd1bdb07..0263f7842 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/TestRuleBehaviors.scala @@ -17,16 +17,17 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame import org.mockito.Mockito.{mock, when => mockWhen} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.slf4j.event.Level._ import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, FeatureSwitches} import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} -trait TestRuleBehaviors extends FunSuite with SparkTestBase with LoggerTestBase { +trait TestRuleBehaviors extends AnyFunSuite with SparkTestBase with LoggerTestBase { def conformanceRuleShouldMatchExpected(inputDf: DataFrame, inputDataset: Dataset, expectedJSON: String) { implicit val dao: MenasDAO = mock(classOf[MenasDAO]) @@ -44,8 +45,9 @@ trait TestRuleBehaviors extends FunSuite with SparkTestBase with LoggerTestBase .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) - val conformed = DynamicInterpreter.interpret(inputDataset, inputDf) + val conformed = DynamicInterpreter().interpret(inputDataset, inputDf) val conformedJSON = conformed.orderBy($"id").toJSON.collect().mkString("\n") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala index 5f789eda0..77043aaeb 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/UppercaseRuleSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.conformance.interpreter.rules import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.samples.DeepArraySamples import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.model.conformanceRule.UppercaseConformanceRule import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class UppercaseRuleSuite extends FunSuite with SparkTestBase with TestRuleBehaviors { +class UppercaseRuleSuite extends AnyFunSuite with SparkTestBase with TestRuleBehaviors { // scalastyle:off line.size.limit private val uppercaseRule = UppercaseConformanceRule(order = 1, outputColumn = "ConformedName", controlCheckpoint = false, inputColumn = "name") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala index 50ba0cd2c..44067bd93 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/custom/CustomRuleSuite.scala @@ -18,7 +18,7 @@ package za.co.absa.enceladus.conformance.interpreter.rules.custom import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.mockito.Mockito.mock -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.config.ConformanceConfig import za.co.absa.enceladus.conformance.interpreter.rules.RuleInterpreter import za.co.absa.enceladus.conformance.interpreter.{DynamicInterpreter, ExplosionState, FeatureSwitches, InterpreterContextArgs} @@ -26,6 +26,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.conformanceRule.ConformanceRule import za.co.absa.enceladus.model.{conformanceRule, Dataset => ConfDataset} import za.co.absa.enceladus.utils.error.ErrorMessage +import za.co.absa.enceladus.utils.fs.HdfsUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase case class MyCustomRule( @@ -56,7 +57,7 @@ case class MyCustomRuleInterpreter(rule: MyCustomRule) extends RuleInterpreter { case class Mine(id: Int) case class MineConfd(id: Int, myOutputCol: Double, errCol: Seq[ErrorMessage]) -class CustomRuleSuite extends FunSuite with SparkTestBase { +class CustomRuleSuite extends AnyFunSuite with SparkTestBase { import spark.implicits._ // we may WANT to enable control framework & spline here @@ -89,7 +90,9 @@ class CustomRuleSuite extends FunSuite with SparkTestBase { .setCatalystWorkaroundEnabled(isCatalystWorkaroundEnabled) .setControlFrameworkEnabled(enableCF) - val actualDf: DataFrame = DynamicInterpreter.interpret(conformanceDef, inputData) + implicit val fsUtils: HdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + + val actualDf: DataFrame = DynamicInterpreter().interpret(conformanceDef, inputData) val actual: Seq[MineConfd] = actualDf.as[MineConfd].collect().toSeq diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala index fbf547f10..9fead4962 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/NestedTestCaseFactory.scala @@ -25,7 +25,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule} import za.co.absa.enceladus.model.test.factories.{DatasetFactory, MappingTableFactory} import za.co.absa.enceladus.model.{Dataset, MappingTable} -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.{HdfsUtils, LocalFsUtils} /** @@ -214,8 +214,8 @@ class NestedTestCaseFactory(implicit spark: SparkSession) { import NestedTestCaseFactory._ private val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) - private val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) - private val tempDir = fsUtils.getLocalTemporaryDirectory("test_case_factory") + private val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + private val tempDir = LocalFsUtils.getLocalTemporaryDirectory("test_case_factory") /** * This method returns all objects necessary to run a dynamic conformance job. diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala index 84f508c1b..a07cd9e71 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/interpreter/rules/testcasefactories/SimpleTestCaseFactory.scala @@ -25,7 +25,7 @@ import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.conformanceRule.{ConformanceRule, MappingConformanceRule} import za.co.absa.enceladus.model.test.factories.{DatasetFactory, MappingTableFactory} import za.co.absa.enceladus.model.{Dataset, DefaultValue, MappingTable} -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.{HdfsUtils, LocalFsUtils} object SimpleTestCaseFactory { @@ -124,8 +124,8 @@ class SimpleTestCaseFactory(implicit spark: SparkSession) { import spark.implicits._ private val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) - private val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) - private val tempDir = fsUtils.getLocalTemporaryDirectory("test_case_factory") + private val fsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) + private val tempDir = LocalFsUtils.getLocalTemporaryDirectory("test_case_factory") /** * This method returns all objects necessary to run a dynamic conformance job. diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala index dd3fa0114..2d41a62e1 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceIntegrationSuite.scala @@ -16,10 +16,10 @@ package za.co.absa.enceladus.conformance.streaming import org.apache.spark.sql.DataFrame -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.conformance.interpreter.fixtures.{NestedStructsFixture, StreamingFixture} -class HyperConformanceIntegrationSuite extends FunSuite with StreamingFixture with NestedStructsFixture { +class HyperConformanceIntegrationSuite extends AnyFunSuite with StreamingFixture with NestedStructsFixture { test("Test with catalyst workaround, literal factory") { implicit val infoDateFactory: InfoDateFactory = new InfoDateLiteralFactory("2020-05-23") diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala index 43dc7f29b..f5cb3d89a 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/HyperConformanceSuite.scala @@ -17,7 +17,8 @@ package za.co.absa.enceladus.conformance.streaming import java.util.ServiceLoader -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.conformance.HyperConformance import za.co.absa.hyperdrive.ingestor.api.{ComponentFactory, ComponentFactoryProvider} import za.co.absa.hyperdrive.ingestor.api.transformer.{StreamTransformerFactory, StreamTransformerFactoryProvider} @@ -30,7 +31,7 @@ import scala.reflect.ClassTag * It is based on: * https://github.com/AbsaOSS/hyperdrive/blob/v3.0.0/ingestor-default/src/test/scala/za/co/absa/hyperdrive/ingestor/implementation/TestServiceProviderConfiguration.scala */ -class HyperConformanceSuite extends FlatSpec with Matchers { +class HyperConformanceSuite extends AnyFlatSpec with Matchers { behavior of "Service Provider Interface (META-INF/services)" diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala index d02fe6663..e9cdbf7a3 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/conformance/streaming/InfoDateFactorySuite.scala @@ -16,12 +16,12 @@ package za.co.absa.enceladus.conformance.streaming import org.apache.commons.configuration2.Configuration -import org.mockito.Mockito._ -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Matchers, WordSpec} +import org.scalatest.matchers.should.Matchers +import org.mockito.scalatest.MockitoSugar +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.conformance.HyperConformanceAttributes._ -class InfoDateFactorySuite extends WordSpec with Matchers with MockitoSugar { +class InfoDateFactorySuite extends AnyWordSpec with Matchers with MockitoSugar { private val configStub: Configuration = mock[Configuration] "InfoDateFactory" should { diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala index 2081ac3ef..240599ffb 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolAsciiSuite.scala @@ -19,15 +19,16 @@ import java.nio.charset.StandardCharsets import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.{StringType, StructField, StructType} -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Outcome, fixture} +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Outcome +import org.scalatest.funsuite.FixtureAnyFunSuite import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.config.StandardizationConfig import za.co.absa.enceladus.standardization.fixtures.TempFileFixture import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class StandardizationCobolAsciiSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar { +class StandardizationCobolAsciiSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar { type FixtureParam = String diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala index 59a3ec86c..ea0732a64 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationCobolEbcdicSuite.scala @@ -17,15 +17,16 @@ package za.co.absa.enceladus.standardization import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.{StringType, StructField, StructType} -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Outcome import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.config.StandardizationConfig import za.co.absa.enceladus.standardization.fixtures.TempFileFixture import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class StandardizationCobolEbcdicSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar { +class StandardizationCobolEbcdicSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar { type FixtureParam = String diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala index 44e51f04a..ab37ef413 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationFixedWidthSuite.scala @@ -16,8 +16,8 @@ package za.co.absa.enceladus.standardization import org.apache.spark.sql.types.{DataType, StructType} -import org.scalatest.FunSuite -import org.scalatest.mockito.MockitoSugar +import org.scalatest.funsuite.AnyFunSuite +import org.mockito.scalatest.MockitoSugar import org.slf4j.{Logger, LoggerFactory} import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset @@ -29,7 +29,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationFixedWidthSuite extends FunSuite with SparkTestBase with MockitoSugar{ +class StandardizationFixedWidthSuite extends AnyFunSuite with SparkTestBase with MockitoSugar{ private implicit val udfLibrary:UDFLibrary = new UDFLibrary() private val log: Logger = LoggerFactory.getLogger(this.getClass) private val argsBase = ("--dataset-name Foo --dataset-version 1 --report-date 2020-06-22 --report-version 1 " + diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala index 278076f22..33b0c86e5 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationJsonSuite.scala @@ -16,8 +16,8 @@ package za.co.absa.enceladus.standardization import org.apache.spark.sql.types.{DataType, StructType} -import org.scalatest.FunSuite -import org.scalatest.mockito.MockitoSugar +import org.scalatest.funsuite.AnyFunSuite +import org.mockito.scalatest.MockitoSugar import org.slf4j.Logger import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset @@ -29,7 +29,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationJsonSuite extends FunSuite with SparkTestBase with MockitoSugar{ +class StandardizationJsonSuite extends AnyFunSuite with SparkTestBase with MockitoSugar{ private implicit val udfLibrary:UDFLibrary = new UDFLibrary() private val standardizationReader = new StandardizationPropertiesProvider() diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala index 0e758922e..adc1d6dea 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationParquetSuite.scala @@ -19,8 +19,9 @@ import java.util.UUID import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types._ -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Outcome import org.slf4j.Logger import za.co.absa.enceladus.common.RecordIdGeneration.IdType import za.co.absa.enceladus.dao.MenasDAO @@ -33,7 +34,7 @@ import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.testUtils.SparkTestBase import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationParquetSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar { +class StandardizationParquetSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar { type FixtureParam = String diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala index a57e50a70..b403e5f00 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/StandardizationRerunSuite.scala @@ -20,8 +20,9 @@ import java.nio.charset.StandardCharsets import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.scalatest.mockito.MockitoSugar -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.mockito.scalatest.MockitoSugar +import org.scalatest.Outcome import org.slf4j.Logger import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset @@ -33,7 +34,7 @@ import za.co.absa.enceladus.utils.testUtils.SparkTestBase import za.co.absa.enceladus.utils.udf.UDFLibrary import za.co.absa.enceladus.utils.validation.ValidationException -class StandardizationRerunSuite extends fixture.FunSuite with SparkTestBase with TempFileFixture with MockitoSugar { +class StandardizationRerunSuite extends FixtureAnyFunSuite with SparkTestBase with TempFileFixture with MockitoSugar { import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala index 63e3e136d..ed20ba4c1 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/config/StandardizationParserSuite.scala @@ -17,13 +17,13 @@ package za.co.absa.enceladus.standardization.config import java.time.ZonedDateTime -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.dao.auth.{MenasKerberosCredentials, MenasPlainCredentials} import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.StandardizationExecution import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class StandardizationParserSuite extends FunSuite with SparkTestBase { +class StandardizationParserSuite extends AnyFunSuite with SparkTestBase { private val year = "2018" private val month = "12" diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala index 48397c8c4..f8d8a5a9a 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/EnhancedStandardizationCsvSuite.scala @@ -15,10 +15,11 @@ package za.co.absa.enceladus.standardization.csv -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.scalatest.Outcome import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture -class EnhancedStandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture { +class EnhancedStandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture { import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala index 471dbd886..8addf882a 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/NoneValueStandardizationCsvSuite.scala @@ -1,9 +1,10 @@ package za.co.absa.enceladus.standardization.csv -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.scalatest.Outcome import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture -class NoneValueStandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture { +class NoneValueStandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture { import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements // A field containing the delimiter with the escape has to be enclosed in specified quotes diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala index 61efcfc76..9b12a5410 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/csv/StandardizationCsvSuite.scala @@ -16,10 +16,11 @@ package za.co.absa.enceladus.standardization.csv import org.apache.spark.SparkException -import org.scalatest.{Outcome, fixture} +import org.scalatest.funsuite.FixtureAnyFunSuite +import org.scalatest.Outcome import za.co.absa.enceladus.standardization.fixtures.CsvFileFixture -class StandardizationCsvSuite extends fixture.FunSuite with CsvFileFixture{ +class StandardizationCsvSuite extends FixtureAnyFunSuite with CsvFileFixture{ import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala index 2d59a6dad..3cef918c4 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/fixtures/CsvFileFixture.scala @@ -20,7 +20,7 @@ import java.nio.charset.{Charset, StandardCharsets} import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} -import org.scalatest.mockito.MockitoSugar +import org.mockito.scalatest.MockitoSugar import za.co.absa.enceladus.dao.MenasDAO import za.co.absa.enceladus.model.Dataset import za.co.absa.enceladus.standardization.StandardizationPropertiesProvider diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala index 3bcdf2e37..6ab624c36 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/CounterPartySuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary @@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.udf.UDFLibrary case class Root(ConformedParty: Party, errCol: Seq[ErrorMessage] = Seq.empty) case class Party(key: Integer, clientKeys1: Seq[String], clientKeys2: Seq[String]) -class CounterPartySuite extends FunSuite with SparkTestBase with LoggerTestBase { +class CounterPartySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { test("Mimic running standardization twice on counter party") { import spark.implicits._ diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala index 95790c827..2accd11df 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/DateTimeSuite.scala @@ -19,7 +19,7 @@ import java.sql.{Date, Timestamp} import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Dataset, Row} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.standardization.interpreter.stages.SchemaChecker import za.co.absa.enceladus.standardization.samples.TestSamples import za.co.absa.enceladus.utils.error.ErrorMessage @@ -30,7 +30,7 @@ import za.co.absa.enceladus.utils.validation.{SchemaValidator, ValidationError, import scala.io.Source -class DateTimeSuite extends FunSuite with SparkTestBase with LoggerTestBase{ +class DateTimeSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{ import spark.implicits._ lazy val data: DataFrame = spark.createDataFrame(TestSamples.dateSamples) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala index 83a768385..a514790ac 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/SampleDataSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types.{DataType, StructType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.standardization.samples.{StdEmployee, TestSamples} import za.co.absa.enceladus.utils.fs.FileReader import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class SampleDataSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class SampleDataSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { test("Simple Example Test") { import spark.implicits._ diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala index cd5317899..8367eecc9 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreterSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.standardization.interpreter.StandardizationInterpreterSuite._ import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.fs.FileReader @@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreterSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StandardizationInterpreterSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala index 6da6c7e83..098ae2dc7 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_ArraySuite.scala @@ -16,7 +16,8 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types._ -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.common.error.ErrorMessageFactory import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} @@ -25,7 +26,7 @@ import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.udf.UDFLibrary import za.co.absa.enceladus.utils.validation.ValidationException -class StandardizationInterpreter_ArraySuite extends FunSuite with SparkTestBase with LoggerTestBase with Matchers { +class StandardizationInterpreter_ArraySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with Matchers { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala index 256a220df..bd4a5856b 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_BinarySuite.scala @@ -16,13 +16,14 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types.{BinaryType, Metadata, MetadataBuilder, StructField, StructType} -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary import za.co.absa.enceladus.utils.validation.ValidationException -class StandardizationInterpreter_BinarySuite extends FunSuite with SparkTestBase with LoggerTestBase with Matchers { +class StandardizationInterpreter_BinarySuite extends AnyFunSuite with SparkTestBase with LoggerTestBase with Matchers { import spark.implicits._ diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala index 66baab7fb..b93a45e58 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DateSuite.scala @@ -18,12 +18,12 @@ package za.co.absa.enceladus.standardization.interpreter import java.sql.Date import org.apache.spark.sql.types.{DateType, MetadataBuilder, StructField, StructType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreter_DateSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StandardizationInterpreter_DateSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala index d4431cf60..325f6389b 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_DecimalSuite.scala @@ -19,13 +19,13 @@ import java.text.{DecimalFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreter_DecimalSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StandardizationInterpreter_DecimalSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala index 4a7458257..d1136fb5d 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_FractionalSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.standardization.interpreter import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StandardizationInterpreter_FractionalSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary @@ -72,7 +72,7 @@ class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTest FractionalRow("02-Null", Option(0), None, Seq( ErrorMessage.stdNullErr("floatField"))), FractionalRow("03-Long", Option(9.223372E18F), Option(-9.223372036854776E18)), - FractionalRow("04-infinity", Option(0), None, Seq( + FractionalRow("04-infinity", Option(0), None, Seq( ErrorMessage.stdCastErr("floatField", "-Infinity"), ErrorMessage.stdCastErr("doubleField", "Infinity"))), FractionalRow("05-Really big", Option(0), None, Seq( @@ -132,7 +132,7 @@ class StandardizationInterpreter_FractionalSuite extends FunSuite with SparkTest FractionalRow("02-Null", Option(0), None, Seq( ErrorMessage.stdNullErr("floatField"))), FractionalRow("03-Long", Option(9.223372E18F), Option(-9.223372036854776E18)), - FractionalRow("04-Infinity", Option(0), None, Seq( + FractionalRow("04-Infinity", Option(0), None, Seq( ErrorMessage.stdCastErr("floatField", "-Infinity"), ErrorMessage.stdCastErr("doubleField", "Infinity"))), FractionalRow("05-Really big", Option(0), Option(reallyBig), Seq( diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala index 629df32bd..b3001e5d2 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_IntegralSuite.scala @@ -19,13 +19,13 @@ import java.text.{DecimalFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreter_IntegralSuite extends FunSuite with SparkTestBase with LoggerTestBase{ +class StandardizationInterpreter_IntegralSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase{ import spark.implicits._ diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala index 713a25945..e006257db 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StandardizationInterpreter_TimestampSuite.scala @@ -18,12 +18,12 @@ package za.co.absa.enceladus.standardization.interpreter import java.sql.Timestamp import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType, TimestampType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary -class StandardizationInterpreter_TimestampSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StandardizationInterpreter_TimestampSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ private implicit val udfLib: UDFLibrary = new UDFLibrary diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala index 8a4d2dcde..438d85aaf 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/StdInterpreterSuite.scala @@ -19,7 +19,7 @@ import java.sql.{Date, Timestamp} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.error.ErrorMessage import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import za.co.absa.enceladus.utils.udf.UDFLibrary @@ -34,7 +34,7 @@ case class MyWrapperStd(counterparty: MyHolder, errCol: Seq[ErrorMessage]) case class Time(id: Int, date: String, timestamp: String) case class StdTime(id: Int, date: Date, timestamp: Timestamp, errCol: List[ErrorMessage]) -class StdInterpreterSuite extends FunSuite with SparkTestBase with LoggerTestBase { +class StdInterpreterSuite extends AnyFunSuite with SparkTestBase with LoggerTestBase { import spark.implicits._ case class subCC(subFieldA: Integer, subFieldB: String) diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala index d866e8dba..5b739cc84 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/PlainSchemaGeneratorSuite.scala @@ -16,10 +16,10 @@ package za.co.absa.enceladus.standardization.interpreter.stages import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class PlainSchemaGeneratorSuite extends FunSuite with SparkTestBase { +class PlainSchemaGeneratorSuite extends AnyFunSuite with SparkTestBase { private val schema = StructType(Seq( StructField("a", IntegerType, nullable = false), StructField("b", IntegerType, nullable = false, new MetadataBuilder().putString("meta", "data").build), diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala index e36789830..f6f1c0120 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus.standardization.interpreter.stages import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase import za.co.absa.enceladus.utils.types.TypedStructField.TypedStructFieldTagged import za.co.absa.enceladus.utils.types.parsers.NumericParser @@ -25,7 +25,7 @@ import za.co.absa.enceladus.utils.udf.{UDFLibrary, UDFResult} import scala.util.Success -class TypeParserSuite extends FunSuite with SparkTestBase { +class TypeParserSuite extends AnyFunSuite with SparkTestBase { private implicit val udfLib: UDFLibrary = new UDFLibrary private implicit val defaults: Defaults = GlobalDefaults diff --git a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala index 3c4ef410a..f940524ef 100644 --- a/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala +++ b/spark-jobs/src/test/scala/za/co/absa/enceladus/standardization/interpreter/stages/TypeParserSuiteTemplate.scala @@ -20,7 +20,7 @@ import java.sql.{Date, Timestamp} import org.apache.log4j.{LogManager, Logger} import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.standardization.interpreter.dataTypes.ParseOutput import za.co.absa.enceladus.standardization.interpreter.stages.TypeParserSuiteTemplate._ import za.co.absa.enceladus.utils.testUtils.SparkTestBase @@ -28,7 +28,7 @@ import za.co.absa.enceladus.utils.time.DateTimePattern import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.udf.UDFLibrary -trait TypeParserSuiteTemplate extends FunSuite with SparkTestBase { +trait TypeParserSuiteTemplate extends AnyFunSuite with SparkTestBase { private implicit val udfLib: UDFLibrary = new UDFLibrary private implicit val defaults: Defaults = GlobalDefaults diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala new file mode 100644 index 000000000..862491650 --- /dev/null +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/DistributedFsUtils.scala @@ -0,0 +1,67 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.utils.fs + +/** + * A set of functions to help with the date partitioning and version control + */ + +trait DistributedFsUtils { + + /** + * Check if a given path exists on the distributed Fs + */ + def exists(distPath: String): Boolean + + def read(distPath: String): String + + /** + * Returns distributed directory size in bytes + */ + def getDirectorySize(distPath: String): Long + + /** + * Returns distributed directory size in bytes, skipping hidden files and directories (starting from '_' or '.'). + * + * @param distPath A path to a directory or a file. + * @return Directory size in bytes + */ + def getDirectorySizeNoHidden(distPath: String): Long + + /** + * Checks if the distributed-FS path contains non-splittable files + */ + def isNonSplittable(distPath: String): Boolean + + /** + * Deletes a distributed-FS directory and all its contents recursively + */ + def deleteDirectoryRecursively(distPath: String): Unit + + /** + * Finds the latest version given a publish folder on distributed-FS + * + * @param publishPath The distributed-FS path to the publish folder containing versions + * @param reportDate The string representation of the report date used to infer the latest version + * @return the latest version or 0 in case no versions exist + */ + def getLatestVersion(publishPath: String, reportDate: String): Int + +} + +object DistributedFsUtils { + val nonSplittableExtensions = List("gz") +} diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala similarity index 73% rename from utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala rename to utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala index 37b99f221..dad506d11 100644 --- a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/FileSystemVersionUtils.scala +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/HdfsUtils.scala @@ -31,17 +31,17 @@ import scala.util.Try * A set of functions to help with the date partitioning and version control */ -class FileSystemVersionUtils(conf: Configuration) { +class HdfsUtils(conf: Configuration) extends DistributedFsUtils { - private val log = LogManager.getLogger("enceladus.utils.fs") + private val log = LogManager.getLogger("enceladus.utils.fs.HdfsUtils") private val fs = FileSystem.get(conf) /** - * Split path URI by separating scheme+server and path part + * Split HDFS path URI by separating scheme+server and path part * Example: * hdfs://server:8020/user/data/input -> (hdfs://server:8020, /user/data/input) * /user/data/input -> ("", /user/data/input) */ - def splitUriPath(path: Path): (String, String) = { + private[fs] def splitUriPath(path: Path): (String, String) = { val uri = path.toUri val scheme = uri.getScheme val authority = uri.getAuthority @@ -73,39 +73,24 @@ class FileSystemVersionUtils(conf: Configuration) { }) } - /** - * Creates a temporary directory in the local filesystem. - * - * @param prefix A prefix to use for the temporary directory. - * @return A path to a temporary directory. - */ - def getLocalTemporaryDirectory(prefix: String): String = { - val tmpPath = Files.createTempDirectory(prefix) - tmpPath.toAbsolutePath.toString - } + /** * Check if a given path exists on HDFS */ - def hdfsExists(path: String): Boolean = { + override def exists(path: String): Boolean = { log.info(s"Cheking if $path exists") fs.exists(new Path(path)) } - /** - * Check if a given files exists on the local file system - */ - def localExists(path: String): Boolean = { - new File(path).exists() - } /** * Function which determines whether the file exists on HDFS or local file system * */ - def exists(path: String): Boolean = { + def existsLocallyOrDistributed(path: String): Boolean = { val local = try { - localExists(path) + LocalFsUtils.localExists(path) } catch { case e: IllegalArgumentException => false } @@ -114,7 +99,7 @@ class FileSystemVersionUtils(conf: Configuration) { true } else { val hdfs = try { - hdfsExists(path) + exists(path) } catch { case e: IllegalArgumentException => false case e: ConnectException => false @@ -136,12 +121,12 @@ class FileSystemVersionUtils(conf: Configuration) { * @return A path to a file in the local filesystem. */ @throws[FileNotFoundException] - def getLocalPathToFile(path: String): String = { - val absolutePath = replaceHome(path) - if (localExists(absolutePath)) { + def getLocalPathToFileOrCopyToLocal(path: String): String = { + val absolutePath = LocalFsUtils.replaceHome(path) + if (LocalFsUtils.localExists(absolutePath)) { absolutePath - } else if (hdfsExists(path)) { - hdfsFileToLocalTempFile(path) + } else if (exists(path)) { + copyDistributedFileToLocalTempFile(path) } else { throw new FileNotFoundException(s"File not found: $path.") } @@ -155,33 +140,23 @@ class FileSystemVersionUtils(conf: Configuration) { * @return The file's content. */ @throws[FileNotFoundException] - def getFileContent(path: String): String = { - val absolutePath = replaceHome(path) - if (localExists(absolutePath)) { - readLocalFile(absolutePath) - } else if (hdfsExists(path)) { - hdfsRead(path) + def getLocalOrDistributedFileContent(path: String): String = { + val absolutePath = LocalFsUtils.replaceHome(path) + if (LocalFsUtils.localExists(absolutePath)) { + LocalFsUtils.readLocalFile(absolutePath) + } else if (exists(path)) { + read(path) } else { throw new FileNotFoundException(s"File not found: $path.") } } - /** - * Reads a local file fully and returns its content. - * - * @param path A path to a file. - * @return The file's content. - */ - def readLocalFile(path: String): String = { - Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8).toArray.mkString("\n") - } - /** * Read a file from HDFS and stores in local file system temp file * * @return The path of the local temp file */ - def hdfsFileToLocalTempFile(hdfsPath: String): String = { + def copyDistributedFileToLocalTempFile(hdfsPath: String): String = { val in = fs.open(new Path(hdfsPath)) val content = Array.fill(in.available())(0.toByte) in.readFully(content) @@ -189,10 +164,13 @@ class FileSystemVersionUtils(conf: Configuration) { tmpFile.deleteOnExit() FileUtils.writeByteArrayToFile(tmpFile, content) tmpFile.getAbsolutePath + + // why not use + // fs.copyToLocalFile(false, new Path(hdfsPath), new Path("someLocalName"), true) } - def hdfsRead(path: String): String = { - val in = fs.open(new Path(path)) + override def read(distPath: String): String = { + val in = fs.open(new Path(distPath)) val content = Array.fill(in.available())(0.toByte) in.readFully(content) new String(content, "UTF-8") @@ -242,13 +220,11 @@ class FileSystemVersionUtils(conf: Configuration) { } /** - * Checks if the path contains non-splittable files + * Checks if the HDFS path contains non-splittable files */ - def isNonSplittable(path: String): Boolean = { - val nonSplittableExtensions = List("gz") - + override def isNonSplittable(path: String): Boolean = { val files = getFilePaths(path) - files.exists(file => nonSplittableExtensions.exists(file.endsWith)) + files.exists(file => DistributedFsUtils.nonSplittableExtensions.exists(file.endsWith)) } /** @@ -256,13 +232,13 @@ class FileSystemVersionUtils(conf: Configuration) { * Example: * /path/to/dir -> ("path/to/dir/file1.extension", "path/to/dir/file2.extension") */ - def getFilePaths(path: String): Array[String] = { + private def getFilePaths(path: String): Array[String] = { val hdfsPath = new Path(path) fs.listStatus(hdfsPath).map(_.getPath.toString) } /** - * Deletes a directory and all its contents recursively + * Deletes a HDFS directory and all its contents recursively */ def deleteDirectoryRecursively(path: String): Unit = { log.info(s"Deleting '$path' recursively...") @@ -279,7 +255,7 @@ class FileSystemVersionUtils(conf: Configuration) { } /** - * Finds the latest version given a publish folder + * Finds the latest version given a publish folder on HDFS * * @param publishPath The HDFS path to the publish folder containing versions * @param reportDate The string representation of the report date used to infer the latest version @@ -299,18 +275,4 @@ class FileSystemVersionUtils(conf: Configuration) { } } - /** - * Replaces tilde ('~') with the home dir. - * - * @param path An input path. - * @return An absolute output path. - */ - def replaceHome(path: String): String = { - if (path.matches("^~.*")) { - //not using replaceFirst as it interprets the backslash in Windows path as escape character mangling the result - System.getProperty("user.home") + path.substring(1) - } else { - path - } - } } diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala new file mode 100644 index 000000000..9c1bf11de --- /dev/null +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/LocalFsUtils.scala @@ -0,0 +1,75 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.utils.fs + +import java.io.File +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} + +import org.apache.log4j.LogManager + +/** + * A set of functions to help with the date partitioning and version control + */ + +object LocalFsUtils { + + private val log = LogManager.getLogger("enceladus.utils.fs.LocalFsUtils") + + /** + * Creates a temporary directory in the local filesystem. + * + * @param prefix A prefix to use for the temporary directory. + * @return A path to a temporary directory. + */ + def getLocalTemporaryDirectory(prefix: String): String = { + val tmpPath = Files.createTempDirectory(prefix) + tmpPath.toAbsolutePath.toString + } + + + /** + * Check if a given files exists on the local file system + */ + def localExists(path: String): Boolean = { + new File(path).exists() + } + + /** + * Reads a local file fully and returns its content. + * + * @param path A path to a file. + * @return The file's content. + */ + def readLocalFile(path: String): String = { + Files.readAllLines(Paths.get(path), StandardCharsets.UTF_8).toArray.mkString("\n") + } + + /** + * Replaces tilde ('~') with the home dir. + * + * @param path An input path. + * @return An absolute output path. + */ + def replaceHome(path: String): String = { + if (path.matches("^~.*")) { + //not using replaceFirst as it interprets the backslash in Windows path as escape character mangling the result + System.getProperty("user.home") + path.substring(1) + } else { + path + } + } +} diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala new file mode 100644 index 000000000..f41c9e980 --- /dev/null +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/S3FsUtils.scala @@ -0,0 +1,271 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.utils.fs + +import org.slf4j.{Logger, LoggerFactory} +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.services.s3.model.{S3Location => _, _} +import za.co.absa.atum.persistence.{S3KmsSettings, S3Location} +import za.co.absa.atum.utils.S3Utils +import za.co.absa.atum.utils.S3Utils.StringS3LocationExt + +import scala.annotation.tailrec +import scala.collection.JavaConverters._ +import scala.util.{Failure, Success, Try} + +// kmsSettings: S3KmsSettings in not currently used, but would be necessary if any SDK calls needed to put data on S3 +case class S3FsUtils(region: Region, kmsSettings: S3KmsSettings)(implicit credentialsProvider: AwsCredentialsProvider) + extends DistributedFsUtils { + + protected val log: Logger = LoggerFactory.getLogger(this.getClass) + private[fs] val maxKeys = 1000 // overridable default + + val s3Client: S3Client = getS3Client + + /** + * Check if a given path exists on the distributed Fs + */ + override def exists(distPath: String): Boolean = { + val location = distPath.toS3Location(region) + + val headRequest = HeadObjectRequest + .builder().bucket(location.bucketName).key(location.path) + .build() + + // there seems to be no doesObjectExist method as of current version https://github.com/aws/aws-sdk-java-v2/issues/392 + Try { + s3Client.headObject(headRequest) + } match { + case Success(_) => + true + case Failure(_: NoSuchKeyException) => + false + case Failure(e) => throw e + } + } + + override def read(distPath: String): String = { + val location = distPath.toS3Location(region) + + val getRequest = GetObjectRequest + .builder().bucket(location.bucketName).key(location.path) + .build() + + val content = s3Client.getObjectAsBytes(getRequest).asUtf8String() + + content + } + + override def getDirectorySize(distPath: String): Long = getDirectorySize(distPath, _ => true) + + /** + * Returns distributed directory size in bytes + */ + private[fs] def getDirectorySize(distPath: String, keyNameFilter: String => Boolean): Long = { + + // setup accumulation + val location = distPath.toS3Location(region) + val initSize = 0L + + def accumulateSizeOp(previousTotalSize: Long, response: ListObjectsV2Response): Long = { + val objects = response.contents().asScala + val totalSize = objects + .filter(obj => keyNameFilter(obj.key)) + .foldLeft(0L) { (currentSize: Long, nextObject: S3Object) => currentSize + nextObject.size } + + previousTotalSize + totalSize + } + + listAndAccumulateRecursively(location, accumulateSizeOp, initSize) + } + + /** + * Hidden files = starting with `_` or `.` This method will return true for hidden keys. + * + * @param key path on s3 + * @return e.g. `/path/to/.hidden` => true, `/path/to/non-hidden` => false + */ + private[fs] def isKeyHidden(key: String): Boolean = { + val fn = key.split('/').last + + (fn.startsWith("_")) || (fn.startsWith(".")) + } + + /** + * Returns distributed directory size in bytes, skipping hidden files and directories (starting from '_' or '.'). + * + * @param distPath A path to a directory or a file. + * @return Directory size in bytes + */ + override def getDirectorySizeNoHidden(distPath: String): Long = getDirectorySize(distPath, key => !isKeyHidden(key)) + + + private[fs] def isKeyNonSplittable(key: String): Boolean = { + val fn = key.split('/').last + + DistributedFsUtils.nonSplittableExtensions.exists(fn.endsWith) + } + + /** + * Checks if the distributed-FS path contains non-splittable files + */ + override def isNonSplittable(distPath: String): Boolean = { + // setup accumulation + val location = distPath.toS3Location(region) + val initFoundValue = false + // we want to break out of the recursion if a non-splittable is found, because it cannot ever be unfound. + val breakOutCase = Some(true) + + def accumulateFoundOp(previouslyFound: Boolean, response: ListObjectsV2Response): Boolean = { + val objects = response.contents().asScala + val nonSplittableFound = objects.exists(obj => isKeyNonSplittable(obj.key)) + + previouslyFound || nonSplittableFound // true if ever found + } + + listAndAccumulateRecursively(location, accumulateFoundOp, initFoundValue, breakOutCase) + } + + /** + * Deletes a distributed-FS directory and all its contents recursively + */ + override def deleteDirectoryRecursively(distPath: String): Unit = { + + // setup accumulation + val location = distPath.toS3Location(region) + + def accumulateSizeOp(acc: Unit, response: ListObjectsV2Response): Unit = { // side-effect, "accumulates" to unit + val objects = response.contents().asScala + if (objects.nonEmpty) { + deleteKeys(location.bucketName, objects.map(_.key)) + } + } + + listAndAccumulateRecursively(location, accumulateSizeOp, ()) + } + + private[fs] def deleteKeys(bucketName: String, keys: Seq[String]): Unit = { + require(keys.nonEmpty) + + val objIds = keys.map(k => ObjectIdentifier.builder().key(k).build()) + val request: DeleteObjectsRequest = DeleteObjectsRequest.builder().bucket(bucketName) + .delete(Delete.builder().objects(objIds.asJava).build()) + .build() + + val delResp: DeleteObjectsResponse = s3Client.deleteObjects(request) + + if (delResp.errors().size() > 0) { + log.warn(s"Errors while deleting (${delResp.errors.size}):\n ${delResp.errors.asScala.map(_.message()).mkString("\n")}") + } + } + + /** + * Finds the latest version given a publish folder on distributed-FS + * + * @param publishPath The distributed-FS path to the publish folder containing versions + * @param reportDate The string representation of the report date used to infer the latest version + * @return the latest version or 0 in case no versions exist + */ + override def getLatestVersion(publishPath: String, reportDate: String): Int = { + + // setup accumulation + val location = publishPath.toS3Location(region) + val initVersion = 0 + + // looking for $publishPath/enceladus_info_date=$reportDate\enceladus_info_version=$version + val prefix = s"${location.path}/enceladus_info_date=$reportDate/enceladus_info_version=" + val prefixedLocation = location.copy(path = prefix) + + def accumulateSizeOp(previousMaxVersion: Int, response: ListObjectsV2Response): Int = { + val objects = response.contents().asScala + + val existingVersions = objects + .map(_.key) + .flatMap { key => + assert(key.startsWith(prefix), s"Retrieved keys should start with $prefix, but precondition fails for $key") + val noPrefix = key.stripPrefix(prefix) + Try { + noPrefix.takeWhile(_.isDigit).toInt // may not hold valid int >= 1 + } match { + case Success(version) if version >= 1 => Some(version) + case _ => None + } + } + .toSet + + if (existingVersions.isEmpty) { + previousMaxVersion + } else { + Math.max(previousMaxVersion, existingVersions.max) + } + } + + listAndAccumulateRecursively(prefixedLocation, accumulateSizeOp, initVersion) + } + + private[fs] def getS3Client: S3Client = S3Utils.getS3Client(region, credentialsProvider) + + /** + * General method to list and accumulate the objects info. Note, that the method strives to be memory-efficient - + * i.e. accumulate the current batch first and then load the next batch (instead of the naive "load all first, process later" + * + * @param location s3location - bucket & path are used + * @param accumulateOp operation to accumulate + * @param initialAccValue (initial/carry-over) accumulator value + * @param breakOut allows to break the recursion prematurely when the defined value equals the currently accumulated value. + * Default: None = no break out + * @tparam T accumulator value type + * @return accumulated value + */ + private def listAndAccumulateRecursively[T](location: S3Location, + accumulateOp: (T, ListObjectsV2Response) => T, + initialAccValue: T, + breakOut: Option[T] = None): T = { + + log.debug(s"listAndAccumulateRecursively($location, $accumulateOp, $initialAccValue, $breakOut)") + + @tailrec + def listAndAccumulateRecursivelyAcc(contToken: Option[String], acc: T): T = { + log.debug(s"listAndAccumulateRecursivelyAcc($contToken, $acc)") + + val listObjectsBuilder = ListObjectsV2Request.builder + .bucket(location.bucketName) + .prefix(location.path) + .maxKeys(maxKeys) + val listObjectsRequest = contToken.fold(listObjectsBuilder.build)(listObjectsBuilder.continuationToken(_).build) + + val response: ListObjectsV2Response = s3Client.listObjectsV2(listObjectsRequest) + val totalAccumulated: T = accumulateOp(acc, response) // result of previous with the currently accumulated together + + // the caller is able define a short-circuiting condition - at which no more processing is needed, hence we "break out" here + if (breakOut.contains(totalAccumulated)) { + log.debug(s"Breakout at totalAccumulated value $totalAccumulated") + totalAccumulated + } else if (!response.isTruncated) { + log.debug(s"Final recursion level reached.") + totalAccumulated + } else { + // need to recurse & accumulate deeper + listAndAccumulateRecursivelyAcc(Some(response.nextContinuationToken), totalAccumulated) + } + } + + // run the recursive call + listAndAccumulateRecursivelyAcc(contToken = None, acc = initialAccValue) + } +} diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala new file mode 100644 index 000000000..d5dfe8b90 --- /dev/null +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/fs/example/S3FsUtilsTestJob.scala @@ -0,0 +1,65 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.utils.fs.example + +import org.slf4j.LoggerFactory +import software.amazon.awssdk.regions.Region +import za.co.absa.atum.persistence.S3KmsSettings +import za.co.absa.atum.utils.S3Utils +import za.co.absa.enceladus.utils.fs.S3FsUtils + +// open: remove or create a integtest like this instead. +// implementation is directly suited to be runnable locally with a saml profile. +object S3FsUtilsTestJob { + + private val log = LoggerFactory.getLogger(this.getClass) + private val bucketName = "putYourBucketBucketNameHere" + + def main(args: Array[String]): Unit = { + val basePath = s"s3://$bucketName/exampleS3Path" + + // This sample example relies on local credentials profile named "saml" with access to the s3 location defined below + implicit val samlCredentialsProvider = S3Utils.getLocalProfileCredentialsProvider("saml") + val kmsKeyId = System.getenv("TOOLING_KMS_KEY_ID") // load from an environment property in order not to disclose it here + log.info(s"kmsKeyId from env loaded = ${kmsKeyId.take(10)}...") + + val s3utils = new S3FsUtils(Region.EU_WEST_1, S3KmsSettings(kmsKeyId)) { + override val maxKeys = 5 // to test recursive listing/action + } + + log.info(s"dir size of $basePath is:" + s3utils.getDirectorySize(basePath)) + log.info(s"dir size (no hidden) of $basePath is:" + s3utils.getDirectorySizeNoHidden(basePath)) + + log.info(s"should exist:" + s3utils.exists(s"$basePath/1/2019/11/27/1/_INFO")) + log.info(s"should not exist:" + s3utils.exists(s"$basePath/1/2019/11/27/1/_INFObogus")) + + log.info("found version (1): " + + s3utils.getLatestVersion(s"s3://$bucketName/superhero/publish", "2020-08-06")) + + log.info("found no version (0): " + + s3utils.getLatestVersion(s"s3://$bucketName/aaa", "2020-08-06")) + + log.info(s"reading file content:" + s3utils.read(s"$basePath/1/2019/11/27/1/_INFO").take(50)) + + log.info(s"should find no gz-s:" + s3utils.isNonSplittable(s"s3://$bucketName/gz-list/nogz")) + log.info(s"should find some gz-s (and breakOut):" + + s3utils.isNonSplittable(s"s3://$bucketName/gz-list/somegz")) + + val deletePath = s"s3://$bucketName/delete" + log.info(s"deleting $deletePath: " + s3utils.deleteDirectoryRecursively(deletePath)) + } + +} diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala index 469cd6cae..97aeed022 100644 --- a/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/performance/PerformanceMetricTools.scala @@ -20,7 +20,7 @@ import org.apache.spark.sql.functions.{col, size, sum} import org.slf4j.{Logger, LoggerFactory} import za.co.absa.atum.core.Atum import za.co.absa.enceladus.utils.error.ErrorMessage -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import za.co.absa.enceladus.utils.fs.DistributedFsUtils import za.co.absa.enceladus.utils.general.ProjectMetadataTools import za.co.absa.enceladus.utils.schema.SchemaUtils @@ -45,14 +45,13 @@ object PerformanceMetricTools { outputPath: String, loginUserName: String, cmdLineArgs: String) - (implicit spark: SparkSession): Unit = { + (implicit spark: SparkSession, fsUtils: DistributedFsUtils): Unit = { // Spark job configuration val sc = spark.sparkContext // The number of executors minus the driver val numberOfExecutors = sc.getExecutorMemoryStatus.keys.size - 1 - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) // Directory sizes and size ratio val inputDirSize = fsUtils.getDirectorySize(inputPath) val inputDataSize = fsUtils.getDirectorySizeNoHidden(inputPath) @@ -96,12 +95,10 @@ object PerformanceMetricTools { outputPath: String, loginUserName: String, cmdLineArgs: String - ): Unit = { - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) + )(implicit fsUtils: DistributedFsUtils): Unit = { // Directory sizes and size ratio val inputDirSize = fsUtils.getDirectorySize(inputPath) - val inputDataSize = fsUtils.getDirectorySizeNoHidden(inputPath) val outputDirSize = fsUtils.getDirectorySize(outputPath) val outputDataSize = fsUtils.getDirectorySizeNoHidden(outputPath) diff --git a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala b/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala index abe8cbf64..158a4a728 100644 --- a/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala +++ b/utils/src/main/scala/za/co/absa/enceladus/utils/testUtils/SparkJobRunnerMethods.scala @@ -15,14 +15,14 @@ package za.co.absa.enceladus.utils.testUtils -import org.scalatest.FunSuiteLike +import org.scalatest.funsuite.AnyFunSuiteLike import scala.language.reflectiveCalls import scala.reflect.ClassTag import scala.reflect.runtime.universe trait SparkJobRunnerMethods { - this: FunSuiteLike => + this: AnyFunSuiteLike => private def runSparkJob[T](implicit ct: ClassTag[T]): Unit = { type MainClass = {def main(args: Array[String]): Unit} diff --git a/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker new file mode 100644 index 000000000..1f0955d45 --- /dev/null +++ b/utils/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker @@ -0,0 +1 @@ +mock-maker-inline diff --git a/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala index c5f7f5c2a..d93ce4afc 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/SchemaPathValidatorSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.validation.{SchemaPathValidator, ValidationError, ValidationIssue, ValidationWarning} /** * A test suite for validation of schema path fields existence. */ -class SchemaPathValidatorSuite extends FunSuite { +class SchemaPathValidatorSuite extends AnyFunSuite { private val schema = StructType( Array( diff --git a/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala index 44000db9a..9fd61f0cc 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/SchemaValidationSuite.scala @@ -16,7 +16,7 @@ package za.co.absa.enceladus import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.LoggerTestBase import za.co.absa.enceladus.utils.validation.SchemaValidator @@ -24,7 +24,7 @@ import za.co.absa.enceladus.utils.validation.SchemaValidator * A test suite for validation of scalar data types */ //noinspection ZeroIndexToHead -class SchemaValidationSuite extends FunSuite with LoggerTestBase{ +class SchemaValidationSuite extends AnyFunSuite with LoggerTestBase{ test("Scalar types should be validated") { val schema = StructType( diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala index e3c3188ec..7d65b0b58 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/ExplosionSuite.scala @@ -17,7 +17,7 @@ package za.co.absa.enceladus.utils import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import org.slf4j.LoggerFactory import za.co.absa.spark.hats.Extensions._ import za.co.absa.enceladus.utils.explode.ExplodeTools @@ -25,7 +25,7 @@ import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.schema.SchemaUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class ExplosionSuite extends FunSuite with SparkTestBase { +class ExplosionSuite extends AnyFunSuite with SparkTestBase { private val logger = LoggerFactory.getLogger(this.getClass) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala index a5f5e0d08..10ffbb094 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/BroadcastUtilsSuite.scala @@ -17,13 +17,13 @@ package za.co.absa.enceladus.utils.broadcast import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Row} -import org.scalatest.WordSpec +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.utils.error.Mapping import za.co.absa.enceladus.utils.testUtils.{LoggerTestBase, SparkTestBase} import scala.collection.mutable -class BroadcastUtilsSuite extends WordSpec with SparkTestBase with LoggerTestBase { +class BroadcastUtilsSuite extends AnyWordSpec with SparkTestBase with LoggerTestBase { import spark.implicits._ diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala index b71fbd917..4de7ae9a9 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/broadcast/LocalMappingTableSuite.scala @@ -17,11 +17,11 @@ package za.co.absa.enceladus.utils.broadcast import org.apache.spark.sql.Row import org.apache.spark.sql.types.{NumericType, StringType, StructType} -import org.scalatest.WordSpec +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.utils.general.JsonUtils import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class LocalMappingTableSuite extends WordSpec with SparkTestBase { +class LocalMappingTableSuite extends AnyWordSpec with SparkTestBase { import spark.implicits._ diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala index 2cbe22413..aaa50e799 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/config/ConfigUtilsSuite.scala @@ -16,11 +16,13 @@ package za.co.absa.enceladus.utils.config import com.typesafe.config.ConfigFactory -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.utils.config.ConfigUtils.ConfigImplicits + import scala.collection.JavaConverters._ -class ConfigUtilsSuite extends FlatSpec with Matchers { +class ConfigUtilsSuite extends AnyFlatSpec with Matchers { val conf = ConfigFactory.parseMap(Map( "some.string.key" -> "string1", diff --git a/utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala similarity index 63% rename from utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala rename to utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala index 1df5c103d..5f7b39e8c 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/FsUtilsSpec.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/HdfsUtilsSpec.scala @@ -13,39 +13,39 @@ * limitations under the License. */ -package za.co.absa.enceladus +package za.co.absa.enceladus.utils.fs import java.io.FileNotFoundException import org.apache.hadoop.fs.Path -import org.scalatest.{Matchers, WordSpec} -import za.co.absa.enceladus.utils.fs.FileSystemVersionUtils +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.utils.testUtils.SparkTestBase /** * Unit tests for File system utils */ -class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase { - val fsUtils = new FileSystemVersionUtils(spark.sparkContext.hadoopConfiguration) +class HdfsUtilsSpec extends AnyWordSpec with Matchers with SparkTestBase { + val hdfsUtils = new HdfsUtils(spark.sparkContext.hadoopConfiguration) "splitUriPath" should { "split URI and path" in { val path = new Path("hdfs://some-host:8020/user/data/input") - val (prefix, rawPath) = fsUtils.splitUriPath(path) + val (prefix, rawPath) = hdfsUtils.splitUriPath(path) prefix shouldEqual "hdfs://some-host:8020" rawPath shouldEqual "/user/data/input" } "not split a path without URI prefix" in { val path = new Path("/projects/coreconformance/publish/dataset") - val (prefix, rawPath) = fsUtils.splitUriPath(path) + val (prefix, rawPath) = hdfsUtils.splitUriPath(path) prefix shouldEqual "" rawPath shouldEqual "/projects/coreconformance/publish/dataset" } "not split relative path" in { val path = new Path("data/input") - val (prefix, rawPath) = fsUtils.splitUriPath(path) + val (prefix, rawPath) = hdfsUtils.splitUriPath(path) prefix shouldEqual "" rawPath shouldEqual "data/input" } @@ -54,27 +54,27 @@ class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase { "getDirectorySize" should { "throw an exception if the specified path does not exist" in { intercept[FileNotFoundException] { - fsUtils.getDirectorySize("src/test/resources/test_data/not_exist") + hdfsUtils.getDirectorySize("src/test/resources/test_data/not_exist") } } "return the file size if a single file is specified" in { - val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir/dummy.txt") + val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir/dummy.txt") assert(dirSize == 20L) } "return the file size if a single hidden file is specified" in { - val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir/_hidden_dummy.txt") + val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir/_hidden_dummy.txt") assert(dirSize == 27L) } "return the size of all files in a directory" in { - val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir") + val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir") assert(dirSize == 47L) } "return the size of all files recursively" in { - val dirSize = fsUtils.getDirectorySize("src/test/resources/test_data/test_dir2") + val dirSize = hdfsUtils.getDirectorySize("src/test/resources/test_data/test_dir2") assert(dirSize == 87L) } } @@ -82,32 +82,32 @@ class FsUtilsSpec extends WordSpec with Matchers with SparkTestBase { "getDirectorySizeNoHidden" should { "throw an exception if the specified path does not exist" in { intercept[FileNotFoundException] { - fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/not_exist") + hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/not_exist") } } "return the file size if a single file is specified" in { - val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/dummy.txt") + val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/dummy.txt") assert(dirSize == 20L) } "return the file size if a single hidden file is specified" in { - val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/_hidden_dummy.txt") + val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir/_hidden_dummy.txt") assert(dirSize == 27L) } "return the size of all non-hidden files in a directory" in { - val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir") + val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir") assert(dirSize == 20L) } "return the size of all non-hidden files recursively along non-hidden paths" in { - val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2") + val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2") assert(dirSize == 40L) } "return the size of all non-hidden files if a hidden directory is specified explicitly" in { - val dirSize = fsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2/_inner_dir") + val dirSize = hdfsUtils.getDirectorySizeNoHidden("src/test/resources/test_data/test_dir2/_inner_dir") assert(dirSize == 20L) } } diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala new file mode 100644 index 000000000..6d558c79d --- /dev/null +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/fs/S3FsUtilsSuite.scala @@ -0,0 +1,394 @@ +/* + * Copyright 2018 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.enceladus.utils.fs + +import org.mockito.captor.{ArgCaptor, Captor} +import org.mockito.scalatest.IdiomaticMockito +import org.mockito.{ArgumentMatchers, Mockito} +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider +import software.amazon.awssdk.core.ResponseBytes +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.services.s3.model._ +import za.co.absa.atum.persistence.S3KmsSettings + +import scala.collection.JavaConverters._ + +class S3FsUtilsSuite extends AnyFlatSpec with IdiomaticMockito with Matchers { + + val kmsSettigns = S3KmsSettings("testingKeyId123") + val region = Region.EU_WEST_2 + + implicit val credentialsProvider = DefaultCredentialsProvider.create() + + // common fixture for all tests + def fixture = new { + val mockedS3Client = mock[S3Client] + val mockedS3FsUtils = new S3FsUtils(region, kmsSettigns) { + override def getS3Client: S3Client = mockedS3Client + + override val maxKeys = 3 // to test recursion for listing + } + } + + "S3FsUtilsTest" should "detect exiting file" in { + val f = fixture + val path = "s3://bucket1/path/to/existing.file" + + // mock S3 response for exist + val mockedResponse: HeadObjectResponse = mock[HeadObjectResponse] + Mockito.when(f.mockedS3Client.headObject(any[HeadObjectRequest])).thenReturn(mockedResponse) + + val existResult = f.mockedS3FsUtils.exists(path) + + // verify request content + val requestCaptor: Captor[HeadObjectRequest] = ArgCaptor[HeadObjectRequest] + Mockito.verify(f.mockedS3Client).headObject(requestCaptor.capture) + val capturedGetRequest = requestCaptor.value + + capturedGetRequest.bucket shouldBe "bucket1" + capturedGetRequest.key shouldBe "path/to/existing.file" + + // verify returned value + existResult shouldBe true + } + + it should "detect non-exiting file" in { + val f = fixture + val path = "s3://bucket1b/path/to/non-existing.file" + + // mock S3 response for exist + Mockito.when(f.mockedS3Client.headObject(any[HeadObjectRequest])) + .thenThrow(NoSuchKeyException.builder.message("the file does not exist!").build()) + + val existResult = f.mockedS3FsUtils.exists(path) + + // verify request content + val requestCaptor: Captor[HeadObjectRequest] = ArgCaptor[HeadObjectRequest] + Mockito.verify(f.mockedS3Client).headObject(requestCaptor.capture) + val capturedGetRequest = requestCaptor.value + + capturedGetRequest.bucket shouldBe "bucket1b" + capturedGetRequest.key shouldBe "path/to/non-existing.file" + + // verify returned value + existResult shouldBe false + } + + it should "read data from S3 path" in { + val f = fixture + val path = "s3://bucket2/path/to/read.file" + val mockedFileContent = "This is the file content on S3" + + val mockedResponseWithContent: ResponseBytes[GetObjectResponse] = mock[ResponseBytes[GetObjectResponse]] + + // mock S3 response + Mockito.when(f.mockedS3Client.getObjectAsBytes(ArgumentMatchers.any[GetObjectRequest])).thenReturn(mockedResponseWithContent) + Mockito.when(mockedResponseWithContent.asUtf8String()).thenReturn(mockedFileContent) + + val readingResult = f.mockedS3FsUtils.read(path) + + // verify request content + val requestCaptor: Captor[GetObjectRequest] = ArgCaptor[GetObjectRequest] + Mockito.verify(f.mockedS3Client).getObjectAsBytes(requestCaptor.capture) + val capturedGetRequest = requestCaptor.value + + capturedGetRequest.bucket shouldBe "bucket2" + capturedGetRequest.key shouldBe "path/to/read.file" + + // verify returned value + readingResult shouldBe mockedFileContent + } + + private case class MockedObjectDef(path: String, size: Long = 0L) { + def toObject: S3Object = S3Object.builder().key(path).size(size).build + } + + private val mockedObjects1 = Seq( + MockedObjectDef("/dir/to/size/.hidden_file1.abc", 1L), + MockedObjectDef("/dir/to/size/_hidden.file2.abc", 2L), + MockedObjectDef("/dir/to/size/regular-file3.abc", 4L) + ).map(_.toObject) + + private val mockedObjects2 = Seq( + MockedObjectDef("/dir/to/size/.hidden_file10.abc", 10L), + MockedObjectDef("/dir/to/size/_hidden.file20.abc", 20L), + MockedObjectDef("/dir/to/size/regular-file30.gz", 40L) + ).map(_.toObject) + + it should "get dir size - simple (no filtering, no pagination)" in { + val f = fixture + val path = "s3://bucket3/dir/to/size" + + val mockedListResponse: ListObjectsV2Response = ListObjectsV2Response.builder() + .isTruncated(false) + .contents(mockedObjects1.asJava) + .build + + // mock S3 response + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])).thenReturn(mockedListResponse) + val dirSizeResult = f.mockedS3FsUtils.getDirectorySize(path) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture) + val capturedListRequest = requestCaptor.value + + capturedListRequest.bucket shouldBe "bucket3" + capturedListRequest.prefix shouldBe "dir/to/size" + capturedListRequest.continuationToken shouldBe null + + // verify returned value + dirSizeResult shouldBe 7L + } + + { + val (f1, f2) = (fixture, fixture) + Seq( + (f1, "all files", (f1.mockedS3FsUtils.getDirectorySize(_)): String => Long, 77L), + (f2, "only non-hidden", (f2.mockedS3FsUtils.getDirectorySizeNoHidden(_)): String => Long, 44L) + ) + }.foreach { case (f, testCaseName, getSizeOp, expectedSize) => + + it should s"get dir size for $testCaseName - with pagination listing" in { + val path = "s3://bucket3b/dir/to/size" + + val mockedListResponses: Seq[ListObjectsV2Response] = Seq( + ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1") + .contents(mockedObjects1.asJava).build, + ListObjectsV2Response.builder().isTruncated(false) + .contents(mockedObjects2.asJava).build + ) + + // mock S3 responses + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])) + .thenReturn(mockedListResponses(0)) + .thenReturn(mockedListResponses(1)) + val dirSizeResult = getSizeOp(path) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture) + val capturedListRequests = requestCaptor.values + + // bucket & path should always be the same + capturedListRequests.foreach(_.bucket shouldBe "bucket3b") + capturedListRequests.foreach(_.prefix shouldBe "dir/to/size") + + // when truncated, the continuationToken was passed along to the next request to resume correctly + capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1") + + // verify returned value + dirSizeResult shouldBe expectedSize + } + } + + Seq( + ("non-splittable", mockedObjects2, true), + ("splittable", mockedObjects1, false) + ).foreach { case (testCaseName, mockedObjects, expectedNonSplitability) => + it should s"find the file list be $testCaseName (simple case, no pagination)" in { + val f = fixture + val path = "s3://bucket4/dir/to/split" + + val mockedListResponse: ListObjectsV2Response = ListObjectsV2Response.builder() + .isTruncated(false) + .contents(mockedObjects.asJava) + .build + + // mock S3 response + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])).thenReturn(mockedListResponse) + val isNonSplittableResult = f.mockedS3FsUtils.isNonSplittable(path) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture) + val capturedListRequest = requestCaptor.value + + capturedListRequest.bucket shouldBe "bucket4" + capturedListRequest.prefix shouldBe "dir/to/split" + capturedListRequest.continuationToken shouldBe null + + // verify returned value + isNonSplittableResult shouldBe expectedNonSplitability + } + } + + it should s"find the file list be non-splittable with breakOut" in { + val f = fixture + val path = "s3://bucket4b/dir/to/split" + + val mockedListResponses: Seq[ListObjectsV2Response] = Seq( + ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1") + .contents(mockedObjects1.asJava).build, + ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token2") + .contents(mockedObjects2.asJava).build + ) + + // mock S3 responses: pretend that there could be a third response with objects, but it should not be reached + // because non-splittable file was already found and the breakOut should prevent from further processing + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])) + .thenReturn(mockedListResponses(0)) + .thenReturn(mockedListResponses(1)) + .thenThrow(new IllegalStateException("Unwanted state - breakOut for non-splitability does not work")) + val isNonSplittableResult = f.mockedS3FsUtils.isNonSplittable(path) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture) + val capturedListRequests = requestCaptor.values + + // bucket & path should always be the same + capturedListRequests.foreach(_.bucket shouldBe "bucket4b") + capturedListRequests.foreach(_.prefix shouldBe "dir/to/split") + + // when truncated, the continuationToken was passed along to the next request to resume correctly + capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1") + + // verify returned value + isNonSplittableResult shouldBe true + } + + it should s"delete files - with pagination listing" in { + val f = fixture + val path = "s3://bucket5/dir/to/delete" + + // mock S3 list responses + val mockedListResponses: Seq[ListObjectsV2Response] = Seq( + ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1") + .contents(mockedObjects1.asJava).build, + ListObjectsV2Response.builder().isTruncated(false) + .contents(mockedObjects2.asJava).build + ) + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])) + .thenReturn(mockedListResponses(0)) + .thenReturn(mockedListResponses(1)) + + // mock delete responses + val mockedDeleteReponse = mock[DeleteObjectsResponse] + Mockito.when(f.mockedS3Client.deleteObjects(ArgumentMatchers.any[DeleteObjectsRequest])) + .thenReturn(mockedDeleteReponse) + Mockito.when(mockedDeleteReponse.errors).thenReturn(List.empty[S3Error].asJava) + + f.mockedS3FsUtils.deleteDirectoryRecursively(path) + + // verify list request contents + val listRequestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(listRequestCaptor.capture) + val capturedListRequests = listRequestCaptor.values + + // bucket & path should always be the same + capturedListRequests.foreach(_.bucket shouldBe "bucket5") + capturedListRequests.foreach(_.prefix shouldBe "dir/to/delete") + + // when truncated, the continuationToken was passed along to the next request to resume correctly + capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1") + + // verify delete requests made + val deleteRequestCaptor: Captor[DeleteObjectsRequest] = ArgCaptor[DeleteObjectsRequest] + Mockito.verify(f.mockedS3Client, Mockito.times(2)).deleteObjects(deleteRequestCaptor.capture) + val capturedDeleteRequests = deleteRequestCaptor.values + + capturedDeleteRequests.foreach(_.bucket shouldBe "bucket5") + // the requests should hold the paths listed + val deletedKeysRequested = capturedDeleteRequests.flatMap(_.delete.objects.asScala.map(_.key)) + deletedKeysRequested should contain theSameElementsInOrderAs (mockedObjects1 ++ mockedObjects2).map(_.key) + } + + private val unrelatedVersionObjects = Seq( + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=aaaa/unrelated.file"), + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=-6/unrelated.file") + ).map(_.toObject) + + Seq( + ("unrelated objects", unrelatedVersionObjects), + ("no objecdts", List.empty[S3Object]) + ).foreach { case (testCaseName, mockedObjects) => + it should s"find the latest version (simple case of $testCaseName - no recursion) to be 0" in { + val f = fixture + val path = "s3://bucket6/publish/path" + val reportDate = "2020-02-22" + + // mock S3 list response + val mockedListResponse = ListObjectsV2Response.builder().isTruncated(false) + .contents(mockedObjects.asJava).build + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])) + .thenReturn(mockedListResponse) + + val lastestVersion = f.mockedS3FsUtils.getLatestVersion(path, reportDate) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client).listObjectsV2(requestCaptor.capture) + val capturedListRequests = requestCaptor.value + + // bucket & path should always be the same + capturedListRequests.bucket shouldBe "bucket6" + capturedListRequests.prefix shouldBe "publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=" + + // verify returned value + lastestVersion shouldBe 0 + } + } + + it should s"find the latest version (with recursion)" in { + val f = fixture + val path = "s3://bucket6b/publish/path" + val reportDate = "2020-02-22" + + // mock S3 list responses + val mockedObjectForVersionLookoup1 = Seq( + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=1/file.abc"), + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=2/file2.abc"), + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=BOGUS/bogus.file") + ).map(_.toObject) + + val mockedObjectForVersionLookoup2 = Seq( + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=4/file.abc"), + MockedObjectDef("publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=6/.hidden.abc") // hidden = no problem + ).map(_.toObject) + + val mockedListResponses: Seq[ListObjectsV2Response] = Seq( + ListObjectsV2Response.builder().isTruncated(true).nextContinuationToken("token1") + .contents(mockedObjectForVersionLookoup1.asJava).build, + ListObjectsV2Response.builder().isTruncated(false) + .contents(mockedObjectForVersionLookoup2.asJava).build + ) + + Mockito.when(f.mockedS3Client.listObjectsV2(ArgumentMatchers.any[ListObjectsV2Request])) + .thenReturn(mockedListResponses(0)) + .thenReturn(mockedListResponses(1)) + val latestVersion = f.mockedS3FsUtils.getLatestVersion(path, reportDate) + + // verify request content + val requestCaptor: Captor[ListObjectsV2Request] = ArgCaptor[ListObjectsV2Request] + Mockito.verify(f.mockedS3Client, Mockito.times(2)).listObjectsV2(requestCaptor.capture) + val capturedListRequests = requestCaptor.values + + // bucket & path should always be the same + capturedListRequests.foreach(_.bucket shouldBe "bucket6b") + capturedListRequests.foreach(_.prefix shouldBe "publish/path/enceladus_info_date=2020-02-22/enceladus_info_version=") + + // when truncated, the continuationToken was passed along to the next request to resume correctly + capturedListRequests.map(_.continuationToken) shouldBe List(null, "token1") + + // verify returned value + latestVersion shouldBe 6 + } + +} diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala index 24b550a64..ba3a066fb 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/AlgorithmsSpec.scala @@ -15,9 +15,9 @@ package za.co.absa.enceladus.utils.general -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite -class AlgorithmsSpec extends FunSuite { +class AlgorithmsSpec extends AnyFunSuite { case class Person(firstName: String, lastName: String) private val people = Seq(Person("Andrew", "Mikels"), Person("Andrew", "Gross"), diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala index e771f273e..31e96a5b7 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/ConfigReaderSuite.scala @@ -16,10 +16,10 @@ package za.co.absa.enceladus.utils.general import com.typesafe.config.ConfigFactory -import org.scalatest.WordSpec +import org.scalatest.wordspec.AnyWordSpec import za.co.absa.enceladus.utils.config.ConfigReader -class ConfigReaderSuite extends WordSpec { +class ConfigReaderSuite extends AnyWordSpec { private val config = ConfigFactory.parseString( """ |top = default diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala index c4a38e318..5204f398a 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/JsonUtilsSuite.scala @@ -15,10 +15,10 @@ package za.co.absa.enceladus.utils.general -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class JsonUtilsSuite extends FunSuite with SparkTestBase { +class JsonUtilsSuite extends AnyFunSuite with SparkTestBase { test("Test JSON pretty formatting from a JSON string") { val inputJson = """[{"id":1,"items":[{"itemid":100,"subitems":[{"elems":[{"numbers":["1","2","3b","4","5c","6"]}],"code":100}]}]}]""" val expected = """[ { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala index 15c97adbb..d1b863502 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/general/SectionSuite.scala @@ -17,11 +17,11 @@ package za.co.absa.enceladus.utils.general import java.security.InvalidParameterException -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import scala.util.{Failure, Try} -class SectionSuite extends FunSuite { +class SectionSuite extends AnyFunSuite { private def checkSectionRemoveExtractInject( section: Section, diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala index 0e89fe223..9b944fe35 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/DataFrameImplicitsSuite.scala @@ -15,11 +15,11 @@ package za.co.absa.enceladus.utils.implicits -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class DataFrameImplicitsSuite extends FunSuite with SparkTestBase { +class DataFrameImplicitsSuite extends AnyFunSuite with SparkTestBase { import spark.implicits._ private val columnName = "data" diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala index 9e3456433..199533d21 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/implicits/StringImplicitsSuite.scala @@ -17,10 +17,11 @@ package za.co.absa.enceladus.utils.implicits import java.security.InvalidParameterException -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.utils.implicits.StringImplicits.StringEnhancements -class StringImplicitsSuite extends FunSuite with Matchers { +class StringImplicitsSuite extends AnyFunSuite with Matchers { test("StringEnhancements.replaceChars - empty replacements") { val s = "supercalifragilisticexpialidocious" assert(s.replaceChars(Map.empty) == s) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala index a99b7c28d..bd41f998e 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SchemaUtilsSuite.scala @@ -15,11 +15,12 @@ package za.co.absa.enceladus.utils.schema -import org.scalatest.{FunSuite, Matchers} import org.apache.spark.sql.types._ +import org.scalatest.funsuite.AnyFunSuite +import org.scalatest.matchers.should.Matchers import za.co.absa.enceladus.utils.schema.SchemaUtils._ -class SchemaUtilsSuite extends FunSuite with Matchers { +class SchemaUtilsSuite extends AnyFunSuite with Matchers { // scalastyle:off magic.number private val schema = StructType(Seq( diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala index 01db97643..a5b82b03c 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/schema/SparkUtilsSuite.scala @@ -18,10 +18,10 @@ package za.co.absa.enceladus.utils.schema import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase -class SparkUtilsSuite extends FunSuite with SparkTestBase { +class SparkUtilsSuite extends AnyFunSuite with SparkTestBase { import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala index 9e06ae20f..c72752a4c 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/time/DateTimePatternSuite.scala @@ -18,10 +18,10 @@ package za.co.absa.enceladus.utils.time import java.security.InvalidParameterException import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.general.Section -class DateTimePatternSuite extends FunSuite { +class DateTimePatternSuite extends AnyFunSuite { test("Pattern for timestamp") { val pattern: String = "yyyy~mm~dd_HH.mm.ss" diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala index 7cd1552bd..2384238da 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/transformations/ArrayTransformationsSuite.scala @@ -15,7 +15,7 @@ package za.co.absa.enceladus.utils.transformations -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.testUtils.SparkTestBase import scala.util.Random import org.apache.spark.sql.functions._ @@ -36,7 +36,7 @@ case class MyC2(something: Int, somethingByTwo: Int) case class Nested2Levels(a: List[List[Option[Int]]]) case class Nested1Level(a: List[Option[Int]]) -class ArrayTransformationsSuite extends FunSuite with SparkTestBase { +class ArrayTransformationsSuite extends AnyFunSuite with SparkTestBase { private val inputData = (0 to 10).toList.map(x => (x, Random.shuffle((0 until x).toList))) private val inputDataOrig = OuterStruct(-1, null) :: inputData.map({ case (x, vals) => OuterStruct(x, vals.map(InnerStruct(_))) }) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala index f6667e03c..e4b5887fb 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/DefaultsSuite.scala @@ -19,11 +19,11 @@ import java.sql.{Date, Timestamp} import java.util.TimeZone import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import scala.util.Success -class DefaultsSuite extends FunSuite { +class DefaultsSuite extends AnyFunSuite { TimeZone.setDefault(TimeZone.getTimeZone("UTC")) test("ByteType") { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala index 70837e625..8eacbd7d6 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/TypedStructFieldSuite.scala @@ -18,14 +18,14 @@ package za.co.absa.enceladus.utils.types import java.text.ParseException import org.apache.spark.sql.types._ -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.TypedStructField._ import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning} import scala.util.{Failure, Success, Try} -class TypedStructFieldSuite extends FunSuite { +class TypedStructFieldSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults private val fieldName = "test_field" private def createField(dataType: DataType, diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala index e375a1056..005286ad5 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DateTimeParserSuite.scala @@ -18,12 +18,12 @@ package za.co.absa.enceladus.utils.types.parsers import java.sql.{Date, Timestamp} import java.text.{ParseException, SimpleDateFormat} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.time.TimeZoneNormalizer case class TestInputRow(id: Int, stringField: String) -class DateTimeParserSuite extends FunSuite{ +class DateTimeParserSuite extends AnyFunSuite{ TimeZoneNormalizer.normalizeJVMTimeZone() test("EnceladusDateParser class epoch") { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala index 1020985d0..785ff1d76 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/DecimalParserSuite.scala @@ -15,13 +15,13 @@ package za.co.absa.enceladus.utils.types.parsers -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern} import za.co.absa.enceladus.utils.types.GlobalDefaults import scala.util.Success -class DecimalParserSuite extends FunSuite { +class DecimalParserSuite extends AnyFunSuite { test("No pattern, no limitations") { val decimalSymbols: DecimalSymbols = GlobalDefaults.getDecimalSymbols val pattern = NumericPattern(decimalSymbols) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala index 1d4238991..95b482860 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/FractionalParserSuite.scala @@ -15,13 +15,13 @@ package za.co.absa.enceladus.utils.types.parsers -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern} import za.co.absa.enceladus.utils.types.GlobalDefaults import scala.util.Success -class FractionalParserSuite extends FunSuite { +class FractionalParserSuite extends AnyFunSuite { private val reallyBigNumberString = "12345678901234567890123456789012345678901234567890123456789012345678901234567890" + "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + "12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890" + diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala index 16b80c4d0..7dfda5b70 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_PatternIntegralParserSuite.scala @@ -15,12 +15,12 @@ package za.co.absa.enceladus.utils.types.parsers -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.numeric.{DecimalSymbols, NumericPattern} import za.co.absa.enceladus.utils.types.GlobalDefaults import scala.util.Success -class IntegralParser_PatternIntegralParserSuite extends FunSuite { +class IntegralParser_PatternIntegralParserSuite extends AnyFunSuite { test("No pattern, no limitations") { val decimalSymbols: DecimalSymbols = GlobalDefaults.getDecimalSymbols val pattern = NumericPattern(decimalSymbols) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala index a9318f583..b5125a6c3 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/types/parsers/IntegralParser_RadixIntegralParserSuite.scala @@ -15,14 +15,14 @@ package za.co.absa.enceladus.utils.types.parsers -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.numeric.Radix import za.co.absa.enceladus.utils.numeric.Radix.RadixFormatException import za.co.absa.enceladus.utils.types.parsers.NumericParser.NumericParserException import scala.util.Success -class IntegralParser_RadixIntegralParserSuite extends FunSuite { +class IntegralParser_RadixIntegralParserSuite extends AnyFunSuite { test("base 10 parsing succeeds") { val parser = IntegralParser.ofRadix(Radix(10)) diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala index b371dcc1b..38e59d725 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/udf/UDFBuilderSuite.scala @@ -19,14 +19,14 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, import org.apache.spark.sql.expressions.UserDefinedFunction import org.apache.spark.sql.types.{DecimalType, DoubleType, LongType, MetadataBuilder, ShortType, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.TypedStructField._ import za.co.absa.enceladus.utils.types.parsers.{DecimalParser, FractionalParser} import za.co.absa.enceladus.utils.types.parsers.IntegralParser.{PatternIntegralParser, RadixIntegralParser} import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} -class UDFBuilderSuite extends FunSuite { +class UDFBuilderSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults test("Serialization and deserialization of stringUdfViaNumericParser (FractionalParser)") { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala index cbce9badc..58c77f995 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/BinaryValidatorSuite.scala @@ -16,12 +16,12 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{BinaryType, MetadataBuilder, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationWarning} -class BinaryValidatorSuite extends FunSuite { +class BinaryValidatorSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults private def field(defaultValue: Option[String] = None, encoding: Option[String] = None, nullable: Boolean = true): TypedStructField = { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala index 4ebb95342..a1bbb5004 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/DateFieldValidatorSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{DateType, MetadataBuilder, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.time.TimeZoneNormalizer import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning} -class DateFieldValidatorSuite extends FunSuite { +class DateFieldValidatorSuite extends AnyFunSuite { TimeZoneNormalizer.normalizeJVMTimeZone() private implicit val defaults: Defaults = GlobalDefaults diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala index 95ed0fa00..ef26cf009 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/FractionalFieldValidatorSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{DataType, DoubleType, FloatType, MetadataBuilder, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.TypedStructField.FractionalTypeStructField import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.ValidationError -class FractionalFieldValidatorSuite extends FunSuite { +class FractionalFieldValidatorSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults private def field(dataType: DataType, metadataBuilder: MetadataBuilder): FractionalTypeStructField[_] = { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala index fc0ce0a4d..83746b6cf 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/IntegralFieldValidatorSuite.scala @@ -16,14 +16,14 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{ByteType, DataType, IntegerType, LongType, MetadataBuilder, ShortType, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.numeric.Radix import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.TypedStructField.IntegralTypeStructField import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationWarning} -class IntegralFieldValidatorSuite extends FunSuite { +class IntegralFieldValidatorSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults private def field(dataType: DataType, metadataBuilder: MetadataBuilder): IntegralTypeStructField[_] = { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala index 8cce6cda0..d93947c18 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/NumericFieldValidatorSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{DataType, DecimalType, MetadataBuilder, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.types.TypedStructField.NumericTypeStructField import za.co.absa.enceladus.utils.validation.ValidationError -class NumericFieldValidatorSuite extends FunSuite { +class NumericFieldValidatorSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults private def field(metadataBuilder: MetadataBuilder): NumericTypeStructField[_] = { diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala index 1531b2ed6..815c43b22 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/ScalarFieldValidatorSuite.scala @@ -16,12 +16,12 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.ValidationError -class ScalarFieldValidatorSuite extends FunSuite { +class ScalarFieldValidatorSuite extends AnyFunSuite { private implicit val defaults: Defaults = GlobalDefaults diff --git a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala index 2ef9218a2..fa98563f6 100644 --- a/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala +++ b/utils/src/test/scala/za/co/absa/enceladus/utils/validation/field/TimestampFieldValidatorSuite.scala @@ -16,13 +16,13 @@ package za.co.absa.enceladus.utils.validation.field import org.apache.spark.sql.types.{MetadataBuilder, StructField, TimestampType} -import org.scalatest.FunSuite +import org.scalatest.funsuite.AnyFunSuite import za.co.absa.enceladus.utils.schema.MetadataKeys import za.co.absa.enceladus.utils.time.TimeZoneNormalizer import za.co.absa.enceladus.utils.types.{Defaults, GlobalDefaults, TypedStructField} import za.co.absa.enceladus.utils.validation.{ValidationError, ValidationIssue, ValidationWarning} -class TimestampFieldValidatorSuite extends FunSuite { +class TimestampFieldValidatorSuite extends AnyFunSuite { TimeZoneNormalizer.normalizeJVMTimeZone() private implicit val defaults: Defaults = GlobalDefaults