Skip to content

Commit

Permalink
[GLUTEN-8616] [VL] Make filescan limit for encrypted fallback as conf…
Browse files Browse the repository at this point in the history
…igurable (#8621)
  • Loading branch information
ArnavBalyan authored Feb 7, 2025
1 parent e009208 commit 91800b4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,13 @@ object VeloxBackendSettings extends BackendSettingsApi {
return None
}

val fileLimit = GlutenConfig.get.parquetEncryptionValidationFileLimit
val encryptionResult =
ParquetMetadataUtils.validateEncryption(format, rootPaths, serializableHadoopConf)
ParquetMetadataUtils.validateEncryption(
format,
rootPaths,
serializableHadoopConf,
fileLimit)
if (encryptionResult.ok()) {
None
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ object ParquetMetadataUtils {
def validateEncryption(
format: ReadFileFormat,
rootPaths: Seq[String],
serializableHadoopConf: Option[SerializableConfiguration]
serializableHadoopConf: Option[SerializableConfiguration],
fileLimit: Int
): ValidationResult = {
if (format != ParquetReadFormat || rootPaths.isEmpty) {
return ValidationResult.succeeded
Expand All @@ -59,7 +60,7 @@ object ParquetMetadataUtils {
val fs = new Path(rootPath).getFileSystem(conf)
try {
val encryptionDetected =
checkForEncryptionWithLimit(fs, new Path(rootPath), conf, fileLimit = 10)
checkForEncryptionWithLimit(fs, new Path(rootPath), conf, fileLimit = fileLimit)
if (encryptionDetected) {
return ValidationResult.failed("Encrypted Parquet file detected.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {

def autoAdjustStageFallenNodeThreshold: Double =
getConf(AUTO_ADJUST_STAGE_RESOURCES_FALLEN_NODE_RATIO_THRESHOLD)

def parquetEncryptionValidationFileLimit: Int = getConf(ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT)
}

object GlutenConfig {
Expand Down Expand Up @@ -2310,4 +2312,14 @@ object GlutenConfig {
"count exceeds the total node count ratio.")
.doubleConf
.createWithDefault(0.5d)

val ENCRYPTED_PARQUET_FALLBACK_FILE_LIMIT =
buildConf("spark.gluten.sql.fallbackEncryptedParquet.limit")
.internal()
.doc("If supplied, `limit` number of files will be checked to determine encryption " +
"and falling back java scan")
.intConf
.checkValue(_ > 0, s"must be positive.")
.createWithDefault(10)

}

0 comments on commit 91800b4

Please sign in to comment.