Skip to content

Commit

Permalink
#1461 FillNulls - move match
Browse files Browse the repository at this point in the history
  • Loading branch information
Zejnilovic committed Jul 28, 2020
1 parent 283325e commit 9d2f738
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import org.apache.spark.sql.functions._
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.utils.schema.SchemaUtils

import scala.util.{Failure, Success}

object FillNullsRuleInterpreter {
final val ruleName = "Fill Nulls Rule"
}
Expand All @@ -42,8 +44,16 @@ case class FillNullsRuleInterpreter(rule: FillNullsConformanceRule) extends Rule
rule.outputColumn
)

val sourceDataType = SchemaUtils.getFieldType(rule.inputColumn, df.schema).get
val default: Column = simpleLiteralCast(rule.value, sourceDataType)
val dataType = SchemaUtils.getFieldType(rule.inputColumn, df.schema).get
val default: Column = simpleLiteralCast(rule.value, dataType) match {
case Success(value) => value
case Failure(_) =>
log.warn(
s"""Unable to cast literal ${rule.value} to $dataType
|for FillNulls conformance rule number ${rule.order}.
|Using string as a fallback.""".stripMargin.replaceAll("[\\r\\n]", ""))
lit(rule.value)
}

if (rule.outputColumn.contains('.')) {
conformNestedField(df, default)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.model.conformanceRule.ConformanceRule
import za.co.absa.enceladus.utils.transformations.ArrayTransformations

import scala.util.{Failure, Success, Try}
import scala.util.Try

trait RuleInterpreter {

Expand Down Expand Up @@ -86,7 +86,7 @@ trait RuleInterpreter {
* @param dataType DataType of the value to be casted to
* @return Returns Column representation of the newly casted value
*/
def simpleLiteralCast(input: String, dataType: DataType): Column = {
def simpleLiteralCast(input: String, dataType: DataType): Try[Column] = {
Try({
dataType match {
case _: ByteType =>
Expand All @@ -112,12 +112,7 @@ trait RuleInterpreter {
case _ =>
lit(input)
}
}) match {
case Success(value) => value
case Failure(_) =>
log.warn(s"Unable to cast literal $input to $dataType")
lit(input)
}
})
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,17 @@ class FillNullsRuleSuite extends FunSuite with SparkTestBase with TestRuleBehavi
value = "Gshj1"
)

private val literalDateRule = FillNullsConformanceRule(
order = 2,
outputColumn = "date2",
controlCheckpoint = false,
inputColumn = "date",
value = "1900-05-05"
)

private val literalRulesList1 = List(literalRule)
private val literalRulesList2 = List(literalRule, literalArrayRule)
private val literalRulesList3 = List(literalRule, literalArrayRule, literalDateRule)

private val literalOrdersDS1 = Dataset(
name = "Orders Conformance",
Expand All @@ -53,32 +62,45 @@ class FillNullsRuleSuite extends FunSuite with SparkTestBase with TestRuleBehavi
)

private val literalOrdersDS2 = literalOrdersDS1.copy(conformance = literalRulesList2)
private val literalOrdersDS3 = literalOrdersDS2.copy(conformance = literalRulesList3)

private val conformedLiteralOrdersJSON1: String =
"""{"id":1,"name":"First Order","items":[{"itemid":"ar229","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":51.0}]},{"itemid":"2891k","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":100.0}]},{"itemid":"31239","qty":2,"price":55.2,"payments":[]}],"errCol":[],"nameNoNull":"First Order"}
|{"id":2,"items":[{"itemid":"AkuYdg","qty":100,"price":10.0,"payments":[{"payid":"d101","amount":10.0},{"payid":"d102","amount":20.0}]},{"itemid":"jUa1k0","qty":2,"price":55.2,"payments":[]}],"errCol":[],"nameNoNull":"NoNullValue"}
"""{"id":1,"name":"First Order","date":"2025-11-15","items":[{"itemid":"ar229","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":51.0}]},{"itemid":"2891k","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":100.0}]},{"itemid":"31239","qty":2,"price":55.2,"payments":[]}],"errCol":[],"nameNoNull":"First Order"}
|{"id":2,"date":"2019-03-12","items":[{"itemid":"AkuYdg","qty":100,"price":10.0,"payments":[{"payid":"d101","amount":10.0},{"payid":"d102","amount":20.0}]},{"itemid":"jUa1k0","qty":2,"price":55.2,"payments":[]}],"errCol":[],"nameNoNull":"NoNullValue"}
|{"id":3,"name":"Third Order","items":[{"qty":10,"price":10000.0,"payments":[{"payid":"pid10","amount":2000.0},{"payid":"pid10","amount":5000.0}]},{"itemid":"Jdha2","qty":100,"price":45.0,"payments":[{"payid":"zk20","amount":150.0},{"payid":"pid10","amount":2000.0}]}],"errCol":[],"nameNoNull":"Third Order"}
|{"id":4,"name":"Fourth Order","items":[{"itemid":"dLda1","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":10.0}]},{"itemid":"d2dhJ","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":15.0}]},{"itemid":"Mska0","qty":2,"price":55.2,"payments":[]},{"itemid":"Gdal1","qty":20,"price":5.2,"payments":[]},{"itemid":"dakl1","qty":99,"price":1.2,"payments":[]}],"errCol":[],"nameNoNull":"Fourth Order"}
|{"id":5,"name":"Fifths order","items":[{"itemid":"hdUs1J","qty":50,"price":0.2,"payments":[{"payid":"pid10","amount":10.0},{"payid":"pid10","amount":11.0},{"payid":"pid10","amount":12.0}]}],"errCol":[],"nameNoNull":"Fifths order"}"""
|{"id":4,"name":"Fourth Order","date":"2005-01-02","items":[{"itemid":"dLda1","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":10.0}]},{"itemid":"d2dhJ","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":15.0}]},{"itemid":"Mska0","qty":2,"price":55.2,"payments":[]},{"itemid":"Gdal1","qty":20,"price":5.2,"payments":[]},{"itemid":"dakl1","qty":99,"price":1.2,"payments":[]}],"errCol":[],"nameNoNull":"Fourth Order"}
|{"id":5,"name":"Fifths order","date":"2009-05-21","items":[{"itemid":"hdUs1J","qty":50,"price":0.2,"payments":[{"payid":"pid10","amount":10.0},{"payid":"pid10","amount":11.0},{"payid":"pid10","amount":12.0}]}],"errCol":[],"nameNoNull":"Fifths order"}"""
.stripMargin.replace("\r\n", "\n")

private val conformedLiteralOrdersJSON2: String =
"""{"id":1,"name":"First Order","items":[{"itemid":"ar229","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":51.0}],"itemid2":"ar229"},{"itemid":"2891k","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":100.0}],"itemid2":"2891k"},{"itemid":"31239","qty":2,"price":55.2,"payments":[],"itemid2":"31239"}],"errCol":[],"nameNoNull":"First Order"}
|{"id":2,"items":[{"itemid":"AkuYdg","qty":100,"price":10.0,"payments":[{"payid":"d101","amount":10.0},{"payid":"d102","amount":20.0}],"itemid2":"AkuYdg"},{"itemid":"jUa1k0","qty":2,"price":55.2,"payments":[],"itemid2":"jUa1k0"}],"errCol":[],"nameNoNull":"NoNullValue"}
"""{"id":1,"name":"First Order","date":"2025-11-15","items":[{"itemid":"ar229","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":51.0}],"itemid2":"ar229"},{"itemid":"2891k","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":100.0}],"itemid2":"2891k"},{"itemid":"31239","qty":2,"price":55.2,"payments":[],"itemid2":"31239"}],"errCol":[],"nameNoNull":"First Order"}
|{"id":2,"date":"2019-03-12","items":[{"itemid":"AkuYdg","qty":100,"price":10.0,"payments":[{"payid":"d101","amount":10.0},{"payid":"d102","amount":20.0}],"itemid2":"AkuYdg"},{"itemid":"jUa1k0","qty":2,"price":55.2,"payments":[],"itemid2":"jUa1k0"}],"errCol":[],"nameNoNull":"NoNullValue"}
|{"id":3,"name":"Third Order","items":[{"qty":10,"price":10000.0,"payments":[{"payid":"pid10","amount":2000.0},{"payid":"pid10","amount":5000.0}],"itemid2":"Gshj1"},{"itemid":"Jdha2","qty":100,"price":45.0,"payments":[{"payid":"zk20","amount":150.0},{"payid":"pid10","amount":2000.0}],"itemid2":"Jdha2"}],"errCol":[],"nameNoNull":"Third Order"}
|{"id":4,"name":"Fourth Order","items":[{"itemid":"dLda1","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":10.0}],"itemid2":"dLda1"},{"itemid":"d2dhJ","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":15.0}],"itemid2":"d2dhJ"},{"itemid":"Mska0","qty":2,"price":55.2,"payments":[],"itemid2":"Mska0"},{"itemid":"Gdal1","qty":20,"price":5.2,"payments":[],"itemid2":"Gdal1"},{"itemid":"dakl1","qty":99,"price":1.2,"payments":[],"itemid2":"dakl1"}],"errCol":[],"nameNoNull":"Fourth Order"}
|{"id":5,"name":"Fifths order","items":[{"itemid":"hdUs1J","qty":50,"price":0.2,"payments":[{"payid":"pid10","amount":10.0},{"payid":"pid10","amount":11.0},{"payid":"pid10","amount":12.0}],"itemid2":"hdUs1J"}],"errCol":[],"nameNoNull":"Fifths order"}"""
|{"id":4,"name":"Fourth Order","date":"2005-01-02","items":[{"itemid":"dLda1","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":10.0}],"itemid2":"dLda1"},{"itemid":"d2dhJ","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":15.0}],"itemid2":"d2dhJ"},{"itemid":"Mska0","qty":2,"price":55.2,"payments":[],"itemid2":"Mska0"},{"itemid":"Gdal1","qty":20,"price":5.2,"payments":[],"itemid2":"Gdal1"},{"itemid":"dakl1","qty":99,"price":1.2,"payments":[],"itemid2":"dakl1"}],"errCol":[],"nameNoNull":"Fourth Order"}
|{"id":5,"name":"Fifths order","date":"2009-05-21","items":[{"itemid":"hdUs1J","qty":50,"price":0.2,"payments":[{"payid":"pid10","amount":10.0},{"payid":"pid10","amount":11.0},{"payid":"pid10","amount":12.0}],"itemid2":"hdUs1J"}],"errCol":[],"nameNoNull":"Fifths order"}"""
.stripMargin.replace("\r\n", "\n")

private val conformedLiteralOrdersJSON3: String =
"""{"id":1,"name":"First Order","date":"2025-11-15","items":[{"itemid":"ar229","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":51.0}],"itemid2":"ar229"},{"itemid":"2891k","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":100.0}],"itemid2":"2891k"},{"itemid":"31239","qty":2,"price":55.2,"payments":[],"itemid2":"31239"}],"errCol":[],"nameNoNull":"First Order","date2":"2025-11-15"}
|{"id":2,"date":"2019-03-12","items":[{"itemid":"AkuYdg","qty":100,"price":10.0,"payments":[{"payid":"d101","amount":10.0},{"payid":"d102","amount":20.0}],"itemid2":"AkuYdg"},{"itemid":"jUa1k0","qty":2,"price":55.2,"payments":[],"itemid2":"jUa1k0"}],"errCol":[],"nameNoNull":"NoNullValue","date2":"2019-03-12"}
|{"id":3,"name":"Third Order","items":[{"qty":10,"price":10000.0,"payments":[{"payid":"pid10","amount":2000.0},{"payid":"pid10","amount":5000.0}],"itemid2":"Gshj1"},{"itemid":"Jdha2","qty":100,"price":45.0,"payments":[{"payid":"zk20","amount":150.0},{"payid":"pid10","amount":2000.0}],"itemid2":"Jdha2"}],"errCol":[],"nameNoNull":"Third Order","date2":"1900-05-05"}
|{"id":4,"name":"Fourth Order","date":"2005-01-02","items":[{"itemid":"dLda1","qty":10,"price":5.1,"payments":[{"payid":"pid10","amount":10.0}],"itemid2":"dLda1"},{"itemid":"d2dhJ","qty":100,"price":1.1,"payments":[{"payid":"zk20","amount":15.0}],"itemid2":"d2dhJ"},{"itemid":"Mska0","qty":2,"price":55.2,"payments":[],"itemid2":"Mska0"},{"itemid":"Gdal1","qty":20,"price":5.2,"payments":[],"itemid2":"Gdal1"},{"itemid":"dakl1","qty":99,"price":1.2,"payments":[],"itemid2":"dakl1"}],"errCol":[],"nameNoNull":"Fourth Order","date2":"2005-01-02"}
|{"id":5,"name":"Fifths order","date":"2009-05-21","items":[{"itemid":"hdUs1J","qty":50,"price":0.2,"payments":[{"payid":"pid10","amount":10.0},{"payid":"pid10","amount":11.0},{"payid":"pid10","amount":12.0}],"itemid2":"hdUs1J"}],"errCol":[],"nameNoNull":"Fifths order","date2":"2009-05-21"}"""
.stripMargin.replace("\r\n", "\n")

private val inputDf: DataFrame = spark.createDataFrame(DeepArraySamples.ordersDataWithNulls)

test("Literal conformance rule test 1") {
test("FillNulls conformance rule test 1") {
conformanceRuleShouldMatchExpected(inputDf, literalOrdersDS1, conformedLiteralOrdersJSON1)
}

test("Literal conformance rule test 2") {
test("FillNulls conformance rule test 2") {
conformanceRuleShouldMatchExpected(inputDf, literalOrdersDS2, conformedLiteralOrdersJSON2)
}

test("FillNulls conformance rule test 3") {
conformanceRuleShouldMatchExpected(inputDf, literalOrdersDS3, conformedLiteralOrdersJSON3)
}

// scalastyle:on line.size.limit
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

package za.co.absa.enceladus.conformance.samples

import java.sql.Date

import org.apache.spark.sql.types._

object DeepArraySamples {
Expand All @@ -26,6 +28,8 @@ object DeepArraySamples {

case class Order(id: Long, name: String, items: Seq[OrderItem])

case class OrderWithDate(id: Long, name: String, date: Date, items: Seq[OrderItem])

val ordersData: Seq[Order] = Seq[Order](
Order(1L, "First Order", Seq[OrderItem](
OrderItem("ar229", 10, 5.1, Seq(Payment("pid10", 51.0))),
Expand All @@ -52,28 +56,28 @@ object DeepArraySamples {
))
)

val ordersDataWithNulls: Seq[Order] = Seq[Order](
Order(1L, "First Order", Seq[OrderItem](
val ordersDataWithNulls: Seq[OrderWithDate] = Seq[OrderWithDate](
OrderWithDate(1L, "First Order", Date.valueOf("2025-11-15"), Seq[OrderItem](
OrderItem("ar229", 10, 5.1, Seq(Payment("pid10", 51.0))),
OrderItem("2891k", 100, 1.1, Seq(Payment("zk20", 100.0))),
OrderItem("31239", 2, 55.2, Nil)
)),
Order(2L, null, Seq[OrderItem](
OrderWithDate(2L, null, Date.valueOf("2019-03-12"), Seq[OrderItem](
OrderItem("AkuYdg", 100, 10, Seq(Payment("d101", 10.0), Payment("d102", 20.0))),
OrderItem("jUa1k0", 2, 55.2, Nil)
)),
Order(3L, "Third Order", Seq[OrderItem](
OrderWithDate(3L, "Third Order", null, Seq[OrderItem](
OrderItem(null, 10, 10000, Seq(Payment("pid10", 2000.0), Payment("pid10", 5000.0))),
OrderItem("Jdha2", 100, 45, Seq(Payment("zk20", 150.0), Payment("pid10", 2000.0)))
)),
Order(4L, "Fourth Order", Seq[OrderItem](
OrderWithDate(4L, "Fourth Order", Date.valueOf("2005-01-02"), Seq[OrderItem](
OrderItem("dLda1", 10, 5.1, Seq(Payment("pid10", 10.0))),
OrderItem("d2dhJ", 100, 1.1, Seq(Payment("zk20", 15.0))),
OrderItem("Mska0", 2, 55.2, Nil),
OrderItem("Gdal1", 20, 5.2, Nil),
OrderItem("dakl1", 99, 1.2, Nil)
)),
Order(5L, "Fifths order", Seq[OrderItem](
OrderWithDate(5L, "Fifths order", Date.valueOf("2009-05-21"), Seq[OrderItem](
OrderItem("hdUs1J", 50, 0.2, Seq(Payment("pid10", 10.0), Payment("pid10", 11.0), Payment("pid10", 12.0)))
))
)
Expand Down

0 comments on commit 9d2f738

Please sign in to comment.