From a6b48ac5c795eb6bcce70cded8d469d1cc6fe2f1 Mon Sep 17 00:00:00 2001
From: "Qian.Sun" <qian.sun2020@gmail.com>
Date: Mon, 10 Oct 2022 17:46:34 +0800
Subject: [PATCH] [SPARK-40726][DOCS] Supplement undocumented orc
 configurations in documentation

---
 docs/sql-data-sources-orc.md | 37 ++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 28e237a382df8..200037a7dea17 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -153,6 +153,24 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     </td>
     <td>2.3.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.orc.columnarReaderBatchSize</code></td>
+    <td><code>4096</code></td>
+    <td>
+      The number of rows to include in an orc vectorized reader batch. The number should 
+      be carefully chosen to minimize overhead and avoid OOMs in reading data.
+    </td>
+    <td>2.4.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.orc.columnarWriterBatchSize</code></td>
+    <td><code>1024</code></td>
+    <td>
+      The number of rows to include in an orc vectorized writer batch. The number should 
+      be carefully chosen to minimize overhead and avoid OOMs in writing data.
+    </td>
+    <td>3.4.0</td>
+  </tr>
   <tr>
     <td><code>spark.sql.orc.enableNestedColumnVectorizedReader</code></td>
     <td><code>false</code></td>
@@ -163,6 +181,25 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     </td>
     <td>3.2.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.orc.filterPushdown</code></td>
+    <td><code>true</code></td>
+    <td>
+      When true, enable filter pushdown for ORC files.
+    </td>
+    <td>1.4.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.orc.aggregatePushdown</code></td>
+    <td><code>false</code></td>
+    <td>
+      If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and 
+      COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date 
+      type. For COUNT, support all data types. If statistics is missing from any ORC file 
+      footer, exception would be thrown.
+    </td>
+    <td>3.3.0</td>
+  </tr>
   <tr>
   <td><code>spark.sql.orc.mergeSchema</code></td>
   <td>false</td>