From 82e217b98a34c251cef8b568c5cd8813f12e8050 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 8 Feb 2019 19:11:59 +0530
Subject: [PATCH 1/8] [SPARK-23619][SQL] Added default column names for explode
 & posexplode functions

---
 sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 1199cd8df6a9..5bac2694d1d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3305,6 +3305,7 @@ object functions {
 
   /**
    * Creates a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements unless specified otherwise.
    *
    * @group collection_funcs
    * @since 1.3.0
@@ -3322,6 +3323,7 @@ object functions {
 
   /**
    * Creates a new row for each element with position in the given array or map column.
+   * Uses the default column names `col` for elements and `pos` for position unless specified otherwise.
    *
    * @group collection_funcs
    * @since 2.1.0

From e7a34eba1360f9934ef9db49a0b1b09cc8b31a91 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Sat, 9 Feb 2019 18:06:45 +0530
Subject: [PATCH 2/8] Added more comments for explode_outer and
 posexplode_outer

---
 .../main/scala/org/apache/spark/sql/functions.scala    | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5bac2694d1d0..46d134ef199e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3305,7 +3305,8 @@ object functions {
 
   /**
    * Creates a new row for each element in the given array or map column.
-   * Uses the default column name `col` for elements unless specified otherwise.
+   * Uses the default column name `col` for elements in the array and
+   * `key` and `value` for elements in the map unless specified otherwise.
    *
    * @group collection_funcs
    * @since 1.3.0
@@ -3314,6 +3315,8 @@ object functions {
 
   /**
    * Creates a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements in the array and
+   * `key` and `value` for elements in the map unless specified otherwise.
    * Unlike explode, if the array/map is null or empty then null is produced.
    *
    * @group collection_funcs
@@ -3323,7 +3326,8 @@ object functions {
 
   /**
    * Creates a new row for each element with position in the given array or map column.
-   * Uses the default column names `col` for elements and `pos` for position unless specified otherwise.
+   * Uses the default column name `pos` for position, and `col` for elements in the array
+   * and `key` and `value` for elements in the map unless specified otherwise.
    *
    * @group collection_funcs
    * @since 2.1.0
@@ -3332,6 +3336,8 @@ object functions {
 
   /**
    * Creates a new row for each element with position in the given array or map column.
+   * Uses the default column name `pos` for position, and `col` for elements in the array
+   * and `key` and `value` for elements in the map unless specified otherwise.
    * Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
    *
    * @group collection_funcs

From 4ed035b302e1f15d7331458e6bd14ece3073564d Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Sat, 9 Feb 2019 18:41:04 +0530
Subject: [PATCH 3/8] Added default column names in ExpressionDescription for
 explode and posexplode

---
 .../apache/spark/sql/catalyst/expressions/generators.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 6b6da1c8b414..d840f2ddff88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -352,7 +352,7 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns.",
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10, 20));
@@ -375,7 +375,7 @@ case class Explode(child: Expression) extends ExplodeBase {
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions.",
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10,20));

From e5751ed9ce397b8d634482ea4f41a383a55e271e Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 22 Feb 2019 10:45:07 +0530
Subject: [PATCH 4/8] Added default column names for inline and stack in
 comments

---
 .../apache/spark/sql/catalyst/expressions/generators.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index d840f2ddff88..582911a18215 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -128,7 +128,7 @@ case class UserDefinedGenerator(
  * }}}
  */
 @ExpressionDescription(
-  usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows.",
+  usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(2, 1, 2, 3);
@@ -391,7 +391,7 @@ case class PosExplode(child: Expression) extends ExplodeBase {
  * Explodes an array of structs into a table.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Explodes an array of structs into a table.",
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));

From 80c0649f35acff057bcea51450606260d78caa46 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 22 Feb 2019 11:01:03 +0530
Subject: [PATCH 5/8] Added scala style line-size exception to prevent failure
 due to comment

---
 .../apache/spark/sql/catalyst/expressions/generators.scala    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 582911a18215..bf17e1a703b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -127,6 +127,7 @@ case class UserDefinedGenerator(
  *   3      NULL
  * }}}
  */
+// scalastyle.off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
@@ -135,6 +136,7 @@ case class UserDefinedGenerator(
        1  2
        3  NULL
   """)
+// scalastyle.on line.size.limit
 case class Stack(children: Seq[Expression]) extends Generator {
 
   private lazy val numRows = children.head.eval().asInstanceOf[Int]
@@ -390,6 +392,7 @@ case class PosExplode(child: Expression) extends ExplodeBase {
 /**
  * Explodes an array of structs into a table.
  */
+// scalastyle.off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """
@@ -398,6 +401,7 @@ case class PosExplode(child: Expression) extends ExplodeBase {
        1  a
        2  b
   """)
+// scalastyle:on line.size.limit
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
   override val position: Boolean = false

From d029d3c7f0883372b76adad531c6a25abea0d59a Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 22 Feb 2019 11:18:50 +0530
Subject: [PATCH 6/8] Fixed typo in scala style exceptions in comments

---
 .../apache/spark/sql/catalyst/expressions/generators.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index bf17e1a703b8..82a7d9825e30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -127,7 +127,7 @@ case class UserDefinedGenerator(
  *   3      NULL
  * }}}
  */
-// scalastyle.off line.size.limit
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
@@ -136,7 +136,7 @@ case class UserDefinedGenerator(
        1  2
        3  NULL
   """)
-// scalastyle.on line.size.limit
+// scalastyle:on line.size.limit
 case class Stack(children: Seq[Expression]) extends Generator {
 
   private lazy val numRows = children.head.eval().asInstanceOf[Int]
@@ -392,7 +392,7 @@ case class PosExplode(child: Expression) extends ExplodeBase {
 /**
  * Explodes an array of structs into a table.
  */
-// scalastyle.off line.size.limit
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """

From 0313b591b36844c7aa66aa70a44d56bfc7d0cb42 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 22 Feb 2019 12:08:10 +0530
Subject: [PATCH 7/8] Added default column names in comments for explode, etc.
 functions in R

---
 R/pkg/R/functions.R | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 8f425b12937e..5110d8417237 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3568,6 +3568,8 @@ setMethod("element_at",
 
 #' @details
 #' \code{explode}: Creates a new row for each element in the given array or map column.
+#' Uses the default column name \code{col} for elements in the array and
+#' \code{key} and \code{value} for elements in the map unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases explode explode,Column-method
@@ -3628,7 +3630,9 @@ setMethod("sort_array",
 
 #' @details
 #' \code{posexplode}: Creates a new row for each element with position in the given array
-#' or map column.
+#' or map column. Uses the default column name \code{pos} for position, and \code{col}
+#' for elements in the array and \code{key} and \code{value} for elements in the map
+#' unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode posexplode,Column-method
@@ -3769,7 +3773,8 @@ setMethod("repeat_string",
 #' \code{explode}: Creates a new row for each element in the given array or map column.
 #' Unlike \code{explode}, if the array/map is \code{null} or empty
 #' then \code{null} is produced.
-#'
+#' Uses the default column name \code{col} for elements in the array and
+#' \code{key} and \code{value} for elements in the map unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases explode_outer explode_outer,Column-method
@@ -3794,6 +3799,9 @@ setMethod("explode_outer",
 #' \code{posexplode_outer}: Creates a new row for each element with position in the given
 #' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
 #' then the row (\code{null}, \code{null}) is produced.
+#' Uses the default column name \code{pos} for position, and \code{col}
+#' for elements in the array and \code{key} and \code{value} for elements in the map
+#' unless specified otherwise.
 #'
 #' @rdname column_collection_functions
 #' @aliases posexplode_outer posexplode_outer,Column-method

From d0dea9cc79b01bf78b1c45bdec3012890bc90718 Mon Sep 17 00:00:00 2001
From: Jash Gala <jashgala@amazon.com>
Date: Fri, 22 Feb 2019 12:09:30 +0530
Subject: [PATCH 8/8] Added default column names in comments for explode, etc.
 functions in Python

---
 python/pyspark/sql/functions.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3c33e2bed92d..953ac6bb6233 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2088,7 +2088,10 @@ def array_except(col1, col2):
 
 @since(1.4)
 def explode(col):
-    """Returns a new row for each element in the given array or map.
+    """
+    Returns a new row for each element in the given array or map.
+    Uses the default column name `col` for elements in the array and
+    `key` and `value` for elements in the map unless specified otherwise.
 
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
@@ -2109,7 +2112,10 @@ def explode(col):
 
 @since(2.1)
 def posexplode(col):
-    """Returns a new row for each element with position in the given array or map.
+    """
+    Returns a new row for each element with position in the given array or map.
+    Uses the default column name `pos` for position, and `col` for elements in the
+    array and `key` and `value` for elements in the map unless specified otherwise.
 
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
@@ -2130,8 +2136,11 @@ def posexplode(col):
 
 @since(2.3)
 def explode_outer(col):
-    """Returns a new row for each element in the given array or map.
+    """
+    Returns a new row for each element in the given array or map.
     Unlike explode, if the array/map is null or empty then null is produced.
+    Uses the default column name `col` for elements in the array and
+    `key` and `value` for elements in the map unless specified otherwise.
 
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
@@ -2163,8 +2172,11 @@ def explode_outer(col):
 
 @since(2.3)
 def posexplode_outer(col):
-    """Returns a new row for each element with position in the given array or map.
+    """
+    Returns a new row for each element with position in the given array or map.
     Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
+    Uses the default column name `pos` for position, and `col` for elements in the
+    array and `key` and `value` for elements in the map unless specified otherwise.
 
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],