Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert log level changes in SanityChecker #465

Merged
merged 1 commit into from
Mar 9, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,10 @@ class SanityChecker(uid: String = UID[SanityChecker])
val sampleFraction = fraction(dataCount)
val sampleData: RDD[(Double, OPVector#Value)] = {
if (sampleFraction < 1.0) {
logDebug(s"Sampling the data for Sanity Checker with sample $sampleFraction and seed $sampSeed")
logInfo(s"Sampling the data for Sanity Checker with sample $sampleFraction and seed $sampSeed")
data.sample(withReplacement = false, fraction = sampleFraction, seed = sampSeed).rdd
} else {
logDebug(s"NOT sampling the data for Sanity Checker, since the calculated check sample is $sampleFraction")
logInfo(s"NOT sampling the data for Sanity Checker, since the calculated check sample is $sampleFraction")
data.rdd
}
} map {
Expand All @@ -375,7 +375,7 @@ class SanityChecker(uid: String = UID[SanityChecker])
}
sampleData.persist()

logDebug("Getting vector rows")
logInfo("Getting vector rows")
val vectorRows: RDD[OldVector] = sampleData.map {
case (0.0, sparse: SparseVector) =>
OldVectors.sparse(sparse.size + 1, sparse.indices, sparse.values)
Expand All @@ -385,7 +385,7 @@ class SanityChecker(uid: String = UID[SanityChecker])
OldVectors.dense(dense.toArray :+ label)
}.persist()

logDebug("Calculating columns stats")
logInfo("Calculating columns stats")
val colStats = Statistics.colStats(vectorRows)
val count = colStats.count
require(count > 0, "Sample size cannot be zero")
Expand Down Expand Up @@ -423,7 +423,7 @@ class SanityChecker(uid: String = UID[SanityChecker])
.map(f => f.index)
val localCorrIndices = (0 until featureSize + 1).diff(hashedIndices).toArray

logDebug(s"Ignoring correlations for hashed text features - out of $featureSize feature vector elements, using " +
logInfo(s"Ignoring correlations for hashed text features - out of $featureSize feature vector elements, using " +
s"${localCorrIndices.length} elements in the correlation matrix calculation")

// Exclude feature vector entries coming from hashed text features if requested
Expand Down Expand Up @@ -456,11 +456,11 @@ class SanityChecker(uid: String = UID[SanityChecker])
if (isDefined(categoricalLabel) && !$(categoricalLabel)) {
Array.empty[CategoricalGroupStats]
} else {
logDebug("Attempting to calculate Cramer's V between each categorical feature and the label")
logInfo("Attempting to calculate Cramer's V between each categorical feature and the label")
categoricalTests(count, featureSize, vectorMetaColumns, sampleData)
}

logDebug("Logging all statistics")
logInfo("Logging all statistics")
val stats = DerivedFeatureFilterUtils.makeColumnStatistics(
vectorMetaColumns,
colStats,
Expand All @@ -469,9 +469,9 @@ class SanityChecker(uid: String = UID[SanityChecker])
corrIndices,
categoricalStats
)
stats.foreach { stat => logDebug(stat.toString) }
stats.foreach { stat => logInfo(stat.toString) }

logDebug("Calculating features to remove")
logInfo("Calculating features to remove")
val (toDropFeatures, warnings) = if (removeBad) {
DerivedFeatureFilterUtils.getFeaturesToDrop(
stats,
Expand Down