-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/1371 Combined Standardization Conformance Job #1392
Changes from 10 commits
3519c3a
b69c950
603329f
c5f3129
799de19
a8088e6
73c2923
aa68dea
455128a
879b010
db426be
fd24062
b557789
b61fb3e
8c4296a
cbe4f03
3c7441f
96bcacc
c7cc534
5054da2
864b7ef
daa4617
dc6b966
cc4ef03
f0d53a3
2240788
44c5af3
f58713d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,7 +55,12 @@ trait ConformanceExecution extends CommonJobExecution { | |
// Enable Control Framework | ||
import za.co.absa.atum.AtumImplicits.SparkSessionWrapper | ||
|
||
spark.enableControlMeasuresTracking(s"${preparationResult.pathCfg.inputPath}/_INFO") | ||
// reinitialize Control Framework in case of combined job | ||
val standardizationPath = preparationResult.pathCfg.standardizationPath | ||
standardizationPath.foreach(_ => spark.disableControlMeasuresTracking()) | ||
|
||
val inputPath = standardizationPath.getOrElse(preparationResult.pathCfg.inputPath) | ||
spark.enableControlMeasuresTracking(s"$inputPath/_INFO") | ||
.setControlMeasuresWorkflow(sourceId.toString) | ||
|
||
// Enable control framework performance optimization for pipeline-like jobs | ||
|
@@ -74,8 +79,8 @@ trait ConformanceExecution extends CommonJobExecution { | |
spark.read.parquet(pathCfg.inputPath) | ||
} | ||
|
||
protected def conform(inputData: DataFrame, preparationResult: PreparationResult) | ||
(implicit spark: SparkSession, cmd: ConformanceConfig, dao: MenasDAO): DataFrame = { | ||
protected def conform[T](inputData: DataFrame, preparationResult: PreparationResult) | ||
(implicit spark: SparkSession, cmd: ConformanceParser[T], dao: MenasDAO): DataFrame = { | ||
val recordIdGenerationStrategy = getRecordIdGenerationStrategyFromConfig(conf) | ||
|
||
implicit val featureSwitcher: FeatureSwitches = conformanceReader.readFeatureSwitches() | ||
|
@@ -101,18 +106,19 @@ trait ConformanceExecution extends CommonJobExecution { | |
} | ||
} | ||
|
||
protected def processConformanceResult(args: Array[String], | ||
protected def processConformanceResult[T](args: Array[String], | ||
result: DataFrame, | ||
preparationResult: PreparationResult, | ||
menasCredentials: MenasCredentials) | ||
(implicit spark: SparkSession, | ||
cmd: ConformanceConfig, | ||
cmd: ConformanceParser[T], | ||
fsUtils: FileSystemVersionUtils): Unit = { | ||
val cmdLineArgs: String = args.mkString(" ") | ||
|
||
val standardizationPath = preparationResult.pathCfg.standardizationPath.getOrElse(preparationResult.pathCfg.inputPath) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For any reader of the code not fully the structure of the code, this would be rather confusing... (similar as in |
||
PerformanceMetricTools.addJobInfoToAtumMetadata( | ||
"conform", | ||
preparationResult.pathCfg.inputPath, | ||
standardizationPath, | ||
preparationResult.pathCfg.outputPath, | ||
menasCredentials.username, cmdLineArgs | ||
) | ||
|
@@ -140,7 +146,7 @@ trait ConformanceExecution extends CommonJobExecution { | |
PerformanceMetricTools.addPerformanceMetricsToAtumMetadata( | ||
spark, | ||
"conform", | ||
preparationResult.pathCfg.inputPath, | ||
standardizationPath, | ||
preparationResult.pathCfg.outputPath, | ||
menasCredentials.username, cmdLineArgs | ||
) | ||
|
@@ -149,7 +155,7 @@ trait ConformanceExecution extends CommonJobExecution { | |
writePerformanceMetrics(preparationResult.performance, cmd) | ||
|
||
if (conformanceReader.isAutocleanStdFolderEnabled()) { | ||
fsUtils.deleteDirectoryRecursively(preparationResult.pathCfg.inputPath) | ||
fsUtils.deleteDirectoryRecursively(standardizationPath) | ||
} | ||
log.info(s"$sourceId finished successfully") | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,17 +17,48 @@ package za.co.absa.enceladus.conformance.interpreter | |
|
||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.types.StructType | ||
import za.co.absa.enceladus.conformance.config.ConformanceConfig | ||
import org.apache.spark.storage.StorageLevel | ||
import za.co.absa.enceladus.conformance.config.{ConformanceConfig, ConformanceParser} | ||
import za.co.absa.enceladus.dao.MenasDAO | ||
import za.co.absa.enceladus.model.{Dataset => ConfDataset} | ||
import za.co.absa.enceladus.standardization_conformance.config.StdConformanceConfig | ||
|
||
/** Holds everything that is needed in between dynamic conformance interpreter stages */ | ||
case class InterpreterContext ( | ||
schema: StructType, | ||
conformance: ConfDataset, | ||
featureSwitches: FeatureSwitches, | ||
jobShortName: String, | ||
spark: SparkSession, | ||
dao: MenasDAO, | ||
progArgs: ConformanceConfig | ||
) | ||
|
||
case class InterpreterContextArgs(datasetName: String, | ||
reportDate: String = "", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. missaligned |
||
persistStorageLevel: Option[StorageLevel] = None | ||
) | ||
|
||
object InterpreterContextArgs { | ||
def fromConformanceConfig[T](conformanceConfig: ConformanceParser[T]): InterpreterContextArgs = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'ConformanceParser` is really a misleading class name. It's not parsing Conformance, it's carrying (maybe parsing) Conformance configuration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then ConformanceConfigParser? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like that. If considered too long, I don't have a problem with |
||
|
||
conformanceConfig match { | ||
case ConformanceConfigInstanceInterpreter(interpreterContextArgs) => interpreterContextArgs | ||
case StdConformanceConfigInstanceInterpreter(interpreterContextArgs) => interpreterContextArgs | ||
case _ => throw new Exception("") | ||
} | ||
} | ||
} | ||
|
||
object ConformanceConfigInstanceInterpreter { | ||
def unapply(conformanceInstance: ConformanceConfig): Option[InterpreterContextArgs] = | ||
Some(InterpreterContextArgs(conformanceInstance.datasetName: String, conformanceInstance.reportDate: String, | ||
conformanceInstance.persistStorageLevel: Option[StorageLevel])) | ||
} | ||
|
||
object StdConformanceConfigInstanceInterpreter { | ||
def unapply(conformanceInstance: StdConformanceConfig): Option[InterpreterContextArgs] = | ||
Some(InterpreterContextArgs(conformanceInstance.datasetName: String, conformanceInstance.reportDate: String, | ||
conformanceInstance.persistStorageLevel: Option[StorageLevel])) | ||
} | ||
|
||
case class InterpreterContext( | ||
schema: StructType, | ||
conformance: ConfDataset, | ||
featureSwitches: FeatureSwitches, | ||
jobShortName: String, | ||
spark: SparkSession, | ||
dao: MenasDAO, | ||
progArgs: InterpreterContextArgs | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Conformance
not used