apache · yaooqinn · Apr 11, 2024 · pan3793 · Apr 11, 2024 · yaooqinn
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -307,16 +307,13 @@ object JdbcUtils extends Logging with SQLConfHelper {
           metadata.putBoolean("logical_time_type", true)
         case java.sql.Types.ROWID =>
           metadata.putBoolean("rowid", true)
-        case java.sql.Types.ARRAY =>
-          val tableName = rsmd.getTableName(i + 1)
-          dialect.getArrayDimension(conn, tableName, columnName).foreach { dimension =>
-            metadata.putLong("arrayDimension", dimension)
-          }
         case _ =>
       }
       metadata.putBoolean("isSigned", isSigned)
       metadata.putBoolean("isTimestampNTZ", isTimestampNTZ)
       metadata.putLong("scale", fieldScale)
+      dialect.updateExtraColumnMeta(conn, rsmd, i + 1, metadata)
+
       val columnType =
         dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse(
           getCatalystType(dataType, typeName, fieldSize, fieldScale, isSigned, isTimestampNTZ))

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Connection, Date, Driver, Statement, Timestamp}
+import java.sql.{Connection, Date, Driver, ResultSetMetaData, Statement, Timestamp}
 import java.time.{Instant, LocalDate, LocalDateTime}
 import java.util
 import java.util.ServiceLoader
@@ -826,20 +826,19 @@ abstract class JdbcDialect extends Serializable with Logging {
   }
 
   /**
-   * Return the array dimension of the column. The array dimension will be carried in the
-   * metadata of the column and used by `getCatalystType` to determine the dimension of the
-   * ArrayType.
+   * Get extra column metadata for the given column.
    *
    * @param conn The connection currently connection being used.
-   * @param tableName The name of the table which the column belongs to.
-   * @param columnName The name of the column.
-   * @return An Option[Int] which contains the number of array dimension.
-   *         If Some(n), the column is an array with n dimensions.
-   *         If the method is un-implemented, or some error encountered, return None.
-   *         Then, `getCatalystType` will try use 1 dimension as default for arrays.
+   * @param rsmd The metadata of the current result set.
+   * @param columnIdx The index of the column.
+   * @param metadata The metadata builder to store the extra column information.
    */
   @Since("4.0.0")
-  def getArrayDimension(conn: Connection, tableName: String, columnName: String): Option[Int] = None
+  def updateExtraColumnMeta(
+      conn: Connection,
+      rsmd: ResultSetMetaData,
+      columnIdx: Int,
+      metadata: MetadataBuilder): Unit = {}
 }
 
 /**

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Connection, Date, SQLException, Timestamp, Types}
+import java.sql.{Connection, Date, ResultSetMetaData, SQLException, Timestamp, Types}
 import java.time.{LocalDateTime, ZoneOffset}
 import java.util
 import java.util.Locale
@@ -343,26 +343,34 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper {
     }
   }
 
-  override def getArrayDimension(
+  override def updateExtraColumnMeta(
       conn: Connection,
-      tableName: String,
-      columnName: String): Option[Int] = {
-    val query =
-      s"""
-         |SELECT pg_attribute.attndims
-         |FROM pg_attribute
-         |  JOIN pg_class ON pg_attribute.attrelid = pg_class.oid
-         |  JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
-         |WHERE pg_class.relname = '$tableName' and pg_attribute.attname = '$columnName'
-         |""".stripMargin
-    try {
-      Using.resource(conn.createStatement()) { stmt =>
-        Using.resource(stmt.executeQuery(query)) { rs =>
-          if (rs.next()) Some(rs.getInt(1)) else None
+      rsmd: ResultSetMetaData,
+      columnIdx: Int,
+      metadata: MetadataBuilder): Unit = {
+    rsmd.getColumnType(columnIdx) match {
+      case Types.ARRAY =>
+        val tableName = rsmd.getTableName(columnIdx)
+        val columnName = rsmd.getColumnName(columnIdx)
+        val query =
+          s"""
+             |SELECT pg_attribute.attndims
+             |FROM pg_attribute
+             |  JOIN pg_class ON pg_attribute.attrelid = pg_class.oid
+             |  JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
+             |WHERE pg_class.relname = '$tableName' and pg_attribute.attname = '$columnName'
+             |""".stripMargin
+        try {
+          Using.resource(conn.createStatement()) { stmt =>
+            Using.resource(stmt.executeQuery(query)) { rs =>
+              if (rs.next()) metadata.putLong("arrayDimension", rs.getLong(1))
+            }
+          }
+        } catch {
+          case e: SQLException =>
+            logWarning(s"Failed to get array dimension for column $columnName", e)
         }
-      }
-    } catch {
-      case _: SQLException => None
+      case _ =>
     }
   }
 }