Skip to content

Commit

Permalink
Add missing json reader options for JsonScanRetrySuite (#11898)
Browse files Browse the repository at this point in the history
Signed-off-by: Jihoon Son <ghoonson@gmail.com>
  • Loading branch information
jihoonson authored Dec 20, 2024
1 parent adb89aa commit 01f9fd2
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,22 @@ object GpuJsonReadCommon {
val allowUnquotedControlChars = options.buildJsonFactory()
.isEnabled(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)

baseCudfJsonOptionsBuilder()
.withNormalizeSingleQuotes(options.allowSingleQuotes)
.withLeadingZeros(options.allowNumericLeadingZeros)
.withNonNumericNumbers(options.allowNonNumericNumbers)
.withUnquotedControlChars(allowUnquotedControlChars)
.build()
}

def baseCudfJsonOptionsBuilder(): ai.rapids.cudf.JSONOptions.Builder = {
ai.rapids.cudf.JSONOptions.builder()
.withRecoverWithNull(true)
.withMixedTypesAsStrings(true)
.withNormalizeWhitespace(true)
.withKeepQuotes(true)
.withNormalizeSingleQuotes(options.allowSingleQuotes)
.withStrictValidation(true)
.withLeadingZeros(options.allowNumericLeadingZeros)
.withNonNumericNumbers(options.allowNonNumericNumbers)
.withUnquotedControlChars(allowUnquotedControlChars)
.withCudfPruneSchema(true)
.withExperimental(true)
.build()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

package com.nvidia.spark.rapids

import ai.rapids.cudf.JSONOptions
import com.nvidia.spark.rapids.jni.RmmSpark

import org.apache.spark.sql.catalyst.json.rapids.JsonPartitionReader
import org.apache.spark.sql.rapids.GpuJsonReadCommon
import org.apache.spark.sql.types._

class JsonScanRetrySuite extends RmmSparkRetrySuiteBase {
Expand All @@ -29,7 +29,7 @@ class JsonScanRetrySuite extends RmmSparkRetrySuiteBase {

val cudfSchema = GpuColumnVector.from(StructType(Seq(StructField("a", IntegerType),
StructField("b", IntegerType))))
val opts = JSONOptions.builder().withLines(true).build()
val opts = GpuJsonReadCommon.baseCudfJsonOptionsBuilder().withLines(true).build()
RmmSpark.forceRetryOOM(RmmSpark.getCurrentThreadId, 1,
RmmSpark.OomInjectionType.GPU.ordinal, 0)
val table = JsonPartitionReader.readToTable(bufferer, cudfSchema, NoopMetric,
Expand Down

0 comments on commit 01f9fd2

Please sign in to comment.