From a6b48ac5c795eb6bcce70cded8d469d1cc6fe2f1 Mon Sep 17 00:00:00 2001 From: "Qian.Sun" Date: Mon, 10 Oct 2022 17:46:34 +0800 Subject: [PATCH] [SPARK-40726][DOCS] Supplement undocumented orc configurations in documentation --- docs/sql-data-sources-orc.md | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index 28e237a382df8..200037a7dea17 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -153,6 +153,24 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC 2.3.0 + + spark.sql.orc.columnarReaderBatchSize + 4096 + + The number of rows to include in an orc vectorized reader batch. The number should + be carefully chosen to minimize overhead and avoid OOMs in reading data. + + 2.4.0 + + + spark.sql.orc.columnarWriterBatchSize + 1024 + + The number of rows to include in an orc vectorized writer batch. The number should + be carefully chosen to minimize overhead and avoid OOMs in writing data. + + 3.4.0 + spark.sql.orc.enableNestedColumnVectorizedReader false @@ -163,6 +181,25 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC 3.2.0 + + spark.sql.orc.filterPushdown + true + + When true, enable filter pushdown for ORC files. + + 1.4.0 + + + spark.sql.orc.aggregatePushdown + false + + If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and + COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date + type. For COUNT, support all data types. If statistics is missing from any ORC file + footer, exception would be thrown. + + 3.3.0 + spark.sql.orc.mergeSchema false