From 0f47a6c7a14ad02ae9a01c686d4eebfacccea8ce Mon Sep 17 00:00:00 2001
From: Mike Pigott
Date: Fri, 8 Feb 2019 23:21:09 +0100
Subject: [PATCH] ARROW-4142: [Java] JDBC Array -> Arrow ListVector
---
.../arrow/adapter/jdbc/JdbcToArrow.java | 2 +-
.../arrow/adapter/jdbc/JdbcToArrowConfig.java | 97 ++-
.../arrow/adapter/jdbc/JdbcToArrowUtils.java | 566 +++++++++++-------
.../adapter/jdbc/JdbcToArrowConfigTest.java | 35 +-
.../org/apache/arrow/adapter/jdbc/Table.java | 6 +-
5 files changed, 481 insertions(+), 225 deletions(-)
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
index 79102043a0f83..7f8c8c08d87b1 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -184,7 +184,6 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
-
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
}
@@ -220,6 +219,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");
+ Preconditions.checkArgument(config.isValid(), "The configuration must be valid");
VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index 8f2a8ef54f839..ac35e85fa3e54 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;
import java.util.Calendar;
+import java.util.Map;
import org.apache.arrow.memory.BaseAllocator;
@@ -28,16 +29,29 @@
*
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
- * fields that are created during the conversion.
+ * fields that are created during the conversion. Neither field may be null
.
*
*
- * Neither field may be null
.
+ * If the includeMetadata
flag is set, the Arrow field metadata will contain information
+ * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
+ * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
+ * {@link org.apache.arrow.vector.FieldVector}.
+ *
+ *
+ * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
+ * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
+ * information cannot be retrieved from all JDBC implementations (H2 for example, returns
+ * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
+ * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
*
*/
public final class JdbcToArrowConfig {
+
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
+ private Map arraySubTypesByColumnIndex;
+ private Map arraySubTypesByColumnName;
/**
* Constructs a new configuration from the provided allocator and calendar. The allocator
@@ -48,18 +62,21 @@ public final class JdbcToArrowConfig {
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
- JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
+ public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = includeMetadata;
+ this.arraySubTypesByColumnIndex = null;
+ this.arraySubTypesByColumnName = null;
}
/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or null
if not converting.
+ *
* @return the calendar.
*/
public Calendar getCalendar() {
@@ -82,4 +99,78 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}
+
+ /**
+ * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
+ *
+ * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
+ * @return This instance of the JdbcToArrowConfig
, for chaining.
+ */
+ public JdbcToArrowConfig setIncludeMetadata(boolean includeMetadata) {
+ this.includeMetadata = includeMetadata;
+ return this;
+ }
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or null
if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
+ if (arraySubTypesByColumnIndex == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnIndex.get(index);
+ }
+ }
+
+ /**
+ * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ *
+ * @param map The mapping.
+ * @return This instance of the JdbcToArrowConfig
, for chaining.
+ */
+ public JdbcToArrowConfig setArraySubTypeByColumnIndexMap(Map map) {
+ this.arraySubTypesByColumnIndex = map;
+ return this;
+ }
+
+ /**
+ * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
+ * @return The {@link JdbcFieldInfo} for that array's sub-type, or null
if not defined.
+ */
+ public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
+ if (arraySubTypesByColumnName == null) {
+ return null;
+ } else {
+ return arraySubTypesByColumnName.get(name);
+ }
+ }
+
+ /**
+ * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
+ *
+ * @param map The mapping.
+ * @return This instance of the JdbcToArrowConfig
, for chaining.
+ */
+ public JdbcToArrowConfig setArraySubTypeByColumnNameMap(Map map) {
+ this.arraySubTypesByColumnName = map;
+ return this;
+ }
+
+ /**
+ * Whether this configuration is valid. The configuration is valid when:
+ *
+ * - A memory allocator is provided.
+ * - A calendar is provided.
+ *
+ *
+ * @return Whether this configuration is valid.
+ */
+ public boolean isValid() {
+ return (calendar != null) && (allocator != null);
+ }
}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 833ca8410a969..6310305a88a6f 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -25,6 +25,7 @@
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
+import java.sql.Array;
import java.sql.Blob;
import java.sql.Clob;
import java.sql.Date;
@@ -59,6 +60,7 @@
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.holders.NullableBigIntHolder;
import org.apache.arrow.vector.holders.NullableBitHolder;
import org.apache.arrow.vector.holders.NullableDateMilliHolder;
@@ -95,6 +97,13 @@ public class JdbcToArrowUtils {
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;
+ /**
+ * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
+ */
+ public static Calendar getUtcCalendar() {
+ return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
+ }
+
/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
@@ -111,67 +120,47 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
}
/**
- * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale.
- */
- public static Calendar getUtcCalendar() {
- return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
- }
-
- /**
- * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
- *
- * This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types.
- *
- *
CHAR --> ArrowType.Utf8
- * NCHAR --> ArrowType.Utf8
- * VARCHAR --> ArrowType.Utf8
- * NVARCHAR --> ArrowType.Utf8
- * LONGVARCHAR --> ArrowType.Utf8
- * LONGNVARCHAR --> ArrowType.Utf8
- * NUMERIC --> ArrowType.Decimal(precision, scale)
- * DECIMAL --> ArrowType.Decimal(precision, scale)
- * BIT --> ArrowType.Bool
- * TINYINT --> ArrowType.Int(8, signed)
- * SMALLINT --> ArrowType.Int(16, signed)
- * INTEGER --> ArrowType.Int(32, signed)
- * BIGINT --> ArrowType.Int(64, signed)
- * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
- * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
- * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
- * BINARY --> ArrowType.Binary
- * VARBINARY --> ArrowType.Binary
- * LONGVARBINARY --> ArrowType.Binary
- * DATE --> ArrowType.Date(DateUnit.MILLISECOND)
- * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
- * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null)
- * CLOB --> ArrowType.Utf8
- * BLOB --> ArrowType.Binary
+ * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}.
+ *
+ * The {@link JdbcToArrowUtils#getArrowTypeForJdbcField(JdbcFieldInfo, Calendar)} method is used to construct a
+ * {@link org.apache.arrow.vector.types.pojo.ArrowType} for each field in the {@link java.sql.ResultSetMetaData}.
+ *
+ *
+ * If {@link JdbcToArrowConfig#getIncludeMetadata()} returns true
, the following fields
+ * will be added to the {@link FieldType#getMetadata()}:
+ *
+ * - {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
+ * - {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
+ * - {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnName(int)}
+ * - {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
+ *
+ *
+ *
+ * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up
+ * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be
+ * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method.
+ *
*
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param config The configuration to use when constructing the schema.
* @return {@link Schema}
* @throws SQLException on error
+ * @throws IllegalArgumentException if rsmd
contains an {@link java.sql.Types#ARRAY} but the
+ * config
does not have a sub-type definition for it.
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(config, "The configuration object must not be null");
-
- final String timezone;
- if (config.getCalendar() != null) {
- timezone = config.getCalendar().getTimeZone().getID();
- } else {
- timezone = null;
- }
+ Preconditions.checkArgument(config.isValid(), "The configuration object must be valid");
List fields = new ArrayList<>();
int columnCount = rsmd.getColumnCount();
for (int i = 1; i <= columnCount; i++) {
final String columnName = rsmd.getColumnName(i);
- final FieldType fieldType;
final Map metadata;
if (config.shouldIncludeMetadata()) {
- metadata = new HashMap<>();
+ metadata = new HashMap();
metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName);
@@ -181,83 +170,173 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig
metadata = null;
}
- switch (rsmd.getColumnType(i)) {
- case Types.BOOLEAN:
- case Types.BIT:
- fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata);
- break;
- case Types.TINYINT:
- fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata);
- break;
- case Types.SMALLINT:
- fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata);
- break;
- case Types.INTEGER:
- fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata);
- break;
- case Types.BIGINT:
- fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata);
- break;
- case Types.NUMERIC:
- case Types.DECIMAL:
- int precision = rsmd.getPrecision(i);
- int scale = rsmd.getScale(i);
- fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata);
- break;
- case Types.REAL:
- case Types.FLOAT:
- fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata);
- break;
- case Types.DOUBLE:
- fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata);
- break;
- case Types.CHAR:
- case Types.NCHAR:
- case Types.VARCHAR:
- case Types.NVARCHAR:
- case Types.LONGVARCHAR:
- case Types.LONGNVARCHAR:
- case Types.CLOB:
- fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata);
- break;
- case Types.DATE:
- fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata);
- break;
- case Types.TIME:
- fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
- break;
- case Types.TIMESTAMP:
- fieldType =
- new FieldType(
- true,
- new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone),
- null,
- metadata);
- break;
- case Types.BINARY:
- case Types.VARBINARY:
- case Types.LONGVARBINARY:
- case Types.BLOB:
- fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata);
- break;
-
- case Types.ARRAY:
- // TODO Need to handle this type
- // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
- default:
- // no-op, shouldn't get here
- fieldType = null;
- break;
- }
+ final ArrowType arrowType = getArrowTypeForJdbcField(new JdbcFieldInfo(rsmd, i), config.getCalendar());
+ if (arrowType != null) {
+ final FieldType fieldType = new FieldType(true, arrowType, null, metadata);
+
+ List children = null;
+ if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) {
+ final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config);
+ if (arrayFieldInfo == null) {
+ throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i);
+ }
+ children = new ArrayList();
+ final ArrowType childType =
+ getArrowTypeForJdbcField(arrayFieldInfo, config.getCalendar());
+ children.add(new Field("child", FieldType.nullable(childType), null));
+ }
- if (fieldType != null) {
- fields.add(new Field(columnName, fieldType, null));
+ fields.add(new Field(columnName, fieldType, children));
}
}
return new Schema(fields, null);
}
+ /**
+ * Creates an {@link org.apache.arrow.vector.types.pojo.ArrowType}
+ * from the {@link JdbcFieldInfo} and {@link java.util.Calendar}.
+ *
+ * This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types.
+ *
+ *
+ * - CHAR --> ArrowType.Utf8
+ * - NCHAR --> ArrowType.Utf8
+ * - VARCHAR --> ArrowType.Utf8
+ * - NVARCHAR --> ArrowType.Utf8
+ * - LONGVARCHAR --> ArrowType.Utf8
+ * - LONGNVARCHAR --> ArrowType.Utf8
+ * - NUMERIC --> ArrowType.Decimal(precision, scale)
+ * - DECIMAL --> ArrowType.Decimal(precision, scale)
+ * - BIT --> ArrowType.Bool
+ * - TINYINT --> ArrowType.Int(8, signed)
+ * - SMALLINT --> ArrowType.Int(16, signed)
+ * - INTEGER --> ArrowType.Int(32, signed)
+ * - BIGINT --> ArrowType.Int(64, signed)
+ * - REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * - FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+ * - DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+ * - BINARY --> ArrowType.Binary
+ * - VARBINARY --> ArrowType.Binary
+ * - LONGVARBINARY --> ArrowType.Binary
+ * - DATE --> ArrowType.Date(DateUnit.MILLISECOND)
+ * - TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
+ * - TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
+ * - CLOB --> ArrowType.Utf8
+ * - BLOB --> ArrowType.Binary
+ *
+ *
+ * @param fieldInfo The field information to construct the ArrowType
from.
+ * @param calendar The calendar to use when constructing the ArrowType.Timestamp
+ * for {@link java.sql.Types#TIMESTAMP} types.
+ * @return The corresponding ArrowType
.
+ * @throws NullPointerException if either fieldInfo
or calendar
are null
.
+ */
+ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calendar calendar) {
+ Preconditions.checkNotNull(fieldInfo, "JdbcFieldInfo object cannot be null");
+
+ final String timezone;
+ if (calendar != null) {
+ timezone = calendar.getTimeZone().getID();
+ } else {
+ timezone = null;
+ }
+
+
+ final ArrowType arrowType;
+
+ switch (fieldInfo.getJdbcType()) {
+ case Types.BOOLEAN:
+ case Types.BIT:
+ arrowType = new ArrowType.Bool();
+ break;
+ case Types.TINYINT:
+ arrowType = new ArrowType.Int(8, true);
+ break;
+ case Types.SMALLINT:
+ arrowType = new ArrowType.Int(16, true);
+ break;
+ case Types.INTEGER:
+ arrowType = new ArrowType.Int(32, true);
+ break;
+ case Types.BIGINT:
+ arrowType = new ArrowType.Int(64, true);
+ break;
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ int precision = fieldInfo.getPrecision();
+ int scale = fieldInfo.getScale();
+ arrowType = new ArrowType.Decimal(precision, scale);
+ break;
+ case Types.REAL:
+ case Types.FLOAT:
+ arrowType = new ArrowType.FloatingPoint(SINGLE);
+ break;
+ case Types.DOUBLE:
+ arrowType = new ArrowType.FloatingPoint(DOUBLE);
+ break;
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.VARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.LONGNVARCHAR:
+ case Types.CLOB:
+ arrowType = new ArrowType.Utf8();
+ break;
+ case Types.DATE:
+ arrowType = new ArrowType.Date(DateUnit.MILLISECOND);
+ break;
+ case Types.TIME:
+ arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ break;
+ case Types.TIMESTAMP:
+ arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone);
+ break;
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.LONGVARBINARY:
+ case Types.BLOB:
+ arrowType = new ArrowType.Binary();
+ break;
+ case Types.ARRAY:
+ arrowType = new ArrowType.List();
+ break;
+ default:
+ // no-op, shouldn't get here
+ arrowType = null;
+ break;
+ }
+
+ return arrowType;
+ }
+
+ /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
+ * If no sub-type can be found, returns null.
+ */
+ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType(
+ ResultSetMetaData rsmd,
+ int arrayColumn,
+ JdbcToArrowConfig config)
+ throws SQLException {
+
+ Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null");
+ Preconditions.checkNotNull(config, "Configuration must not be null");
+ Preconditions.checkArgument(
+ arrayColumn > 0,
+ "ResultSetMetaData columns start with 1; column cannot be less than 1");
+ Preconditions.checkArgument(
+ arrayColumn <= rsmd.getColumnCount(),
+ "Column number cannot be more than the number of columns");
+ Preconditions.checkArgument(config.isValid(), "Configuration must be valid");
+
+ JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn);
+ if (fieldInfo == null) {
+ fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnName(arrayColumn));
+ }
+ return fieldInfo;
+ }
+
private static void allocateVectors(VectorSchemaRoot root, int size) {
List vectors = root.getFieldVectors();
for (FieldVector fieldVector : vectors) {
@@ -284,7 +363,8 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
throws SQLException, IOException {
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
- Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
+ Preconditions.checkNotNull(root, "Vector Schema cannot be null");
+ Preconditions.checkNotNull(calendar, "Calendar object can't be null");
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false));
}
@@ -304,123 +384,140 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
+ Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid");
ResultSetMetaData rsmd = rs.getMetaData();
int columnCount = rsmd.getColumnCount();
allocateVectors(root, DEFAULT_BUFFER_SIZE);
- final Calendar calendar = config.getCalendar();
-
int rowCount = 0;
while (rs.next()) {
for (int i = 1; i <= columnCount; i++) {
- String columnName = rsmd.getColumnName(i);
- switch (rsmd.getColumnType(i)) {
- case Types.BOOLEAN:
- case Types.BIT:
- updateVector((BitVector) root.getVector(columnName),
- rs.getBoolean(i), !rs.wasNull(), rowCount);
- break;
- case Types.TINYINT:
- updateVector((TinyIntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.SMALLINT:
- updateVector((SmallIntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.INTEGER:
- updateVector((IntVector) root.getVector(columnName),
- rs.getInt(i), !rs.wasNull(), rowCount);
- break;
- case Types.BIGINT:
- updateVector((BigIntVector) root.getVector(columnName),
- rs.getLong(i), !rs.wasNull(), rowCount);
- break;
- case Types.NUMERIC:
- case Types.DECIMAL:
- updateVector((DecimalVector) root.getVector(columnName),
- rs.getBigDecimal(i), !rs.wasNull(), rowCount);
- break;
- case Types.REAL:
- case Types.FLOAT:
- updateVector((Float4Vector) root.getVector(columnName),
- rs.getFloat(i), !rs.wasNull(), rowCount);
- break;
- case Types.DOUBLE:
- updateVector((Float8Vector) root.getVector(columnName),
- rs.getDouble(i), !rs.wasNull(), rowCount);
- break;
- case Types.CHAR:
- case Types.NCHAR:
- case Types.VARCHAR:
- case Types.NVARCHAR:
- case Types.LONGVARCHAR:
- case Types.LONGNVARCHAR:
- updateVector((VarCharVector) root.getVector(columnName),
- rs.getString(i), !rs.wasNull(), rowCount);
- break;
- case Types.DATE:
- final Date date;
- if (calendar != null) {
- date = rs.getDate(i, calendar);
- } else {
- date = rs.getDate(i);
- }
-
- updateVector((DateMilliVector) root.getVector(columnName), date, !rs.wasNull(), rowCount);
- break;
- case Types.TIME:
- final Time time;
- if (calendar != null) {
- time = rs.getTime(i, calendar);
- } else {
- time = rs.getTime(i);
- }
-
- updateVector((TimeMilliVector) root.getVector(columnName), time, !rs.wasNull(), rowCount);
- break;
- case Types.TIMESTAMP:
- final Timestamp ts;
- if (calendar != null) {
- ts = rs.getTimestamp(i, calendar);
- } else {
- ts = rs.getTimestamp(i);
- }
-
- // TODO: Need to handle precision such as milli, micro, nano
- updateVector((TimeStampVector) root.getVector(columnName), ts, !rs.wasNull(), rowCount);
- break;
- case Types.BINARY:
- case Types.VARBINARY:
- case Types.LONGVARBINARY:
- updateVector((VarBinaryVector) root.getVector(columnName),
- rs.getBinaryStream(i), !rs.wasNull(), rowCount);
- break;
- case Types.ARRAY:
- // TODO Need to handle this type
- // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
- break;
- case Types.CLOB:
- updateVector((VarCharVector) root.getVector(columnName),
- rs.getClob(i), !rs.wasNull(), rowCount);
- break;
- case Types.BLOB:
- updateVector((VarBinaryVector) root.getVector(columnName),
- rs.getBlob(i), !rs.wasNull(), rowCount);
- break;
-
- default:
- // no-op, shouldn't get here
- break;
- }
+ jdbcToFieldVector(
+ rs,
+ i,
+ rs.getMetaData().getColumnType(i),
+ rowCount,
+ root.getVector(rsmd.getColumnName(i)),
+ config);
}
rowCount++;
}
root.setRowCount(rowCount);
}
+ private static void jdbcToFieldVector(
+ ResultSet rs,
+ int i,
+ int jdbcColType,
+ int rowCount,
+ FieldVector vector,
+ JdbcToArrowConfig config)
+ throws SQLException, IOException {
+
+ final Calendar calendar = config.getCalendar();
+
+ switch (jdbcColType) {
+ case Types.BOOLEAN:
+ case Types.BIT:
+ updateVector((BitVector) vector,
+ rs.getBoolean(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.TINYINT:
+ updateVector((TinyIntVector) vector,
+ rs.getInt(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.SMALLINT:
+ updateVector((SmallIntVector) vector,
+ rs.getInt(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.INTEGER:
+ updateVector((IntVector) vector,
+ rs.getInt(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.BIGINT:
+ updateVector((BigIntVector) vector,
+ rs.getLong(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.NUMERIC:
+ case Types.DECIMAL:
+ updateVector((DecimalVector) vector,
+ rs.getBigDecimal(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.REAL:
+ case Types.FLOAT:
+ updateVector((Float4Vector) vector,
+ rs.getFloat(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.DOUBLE:
+ updateVector((Float8Vector) vector,
+ rs.getDouble(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.CHAR:
+ case Types.NCHAR:
+ case Types.VARCHAR:
+ case Types.NVARCHAR:
+ case Types.LONGVARCHAR:
+ case Types.LONGNVARCHAR:
+ updateVector((VarCharVector) vector,
+ rs.getString(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.DATE:
+ final Date date;
+ if (calendar != null) {
+ date = rs.getDate(i, calendar);
+ } else {
+ date = rs.getDate(i);
+ }
+
+ updateVector((DateMilliVector) vector, date, !rs.wasNull(), rowCount);
+ break;
+ case Types.TIME:
+ final Time time;
+ if (calendar != null) {
+ time = rs.getTime(i, calendar);
+ } else {
+ time = rs.getTime(i);
+ }
+
+ updateVector((TimeMilliVector) vector, time, !rs.wasNull(), rowCount);
+ break;
+ case Types.TIMESTAMP:
+ final Timestamp ts;
+ if (calendar != null) {
+ ts = rs.getTimestamp(i, calendar);
+ } else {
+ ts = rs.getTimestamp(i);
+ }
+
+ // TODO: Need to handle precision such as milli, micro, nano
+ updateVector((TimeStampVector) vector, ts, !rs.wasNull(), rowCount);
+ break;
+ case Types.BINARY:
+ case Types.VARBINARY:
+ case Types.LONGVARBINARY:
+ updateVector((VarBinaryVector) vector,
+ rs.getBinaryStream(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.ARRAY:
+ updateVector((ListVector) vector, rs, i, rowCount, config);
+ break;
+ case Types.CLOB:
+ updateVector((VarCharVector) vector,
+ rs.getClob(i), !rs.wasNull(), rowCount);
+ break;
+ case Types.BLOB:
+ updateVector((VarBinaryVector) vector,
+ rs.getBlob(i), !rs.wasNull(), rowCount);
+ break;
+
+ default:
+ // no-op, shouldn't get here
+ break;
+ }
+ }
+
private static void updateVector(BitVector bitVector, boolean value, boolean isNonNull, int rowCount) {
NullableBitHolder holder = new NullableBitHolder();
holder.isSet = isNonNull ? 1 : 0;
@@ -620,4 +717,39 @@ private static void updateVector(VarBinaryVector varBinaryVector, Blob blob, boo
updateVector(varBinaryVector, blob != null ? blob.getBinaryStream() : null, isNonNull, rowCount);
}
+ private static void updateVector(
+ ListVector listVector,
+ ResultSet resultSet,
+ int arrayIndex,
+ int rowCount,
+ JdbcToArrowConfig config)
+ throws SQLException, IOException {
+
+ final JdbcFieldInfo fieldInfo = getJdbcFieldInfoForArraySubType(resultSet.getMetaData(), arrayIndex, config);
+ if (fieldInfo == null) {
+ throw new IllegalStateException("Column " + arrayIndex + " is an array of unknown type.");
+ }
+
+ final int valueCount = listVector.getValueCount();
+ final Array array = resultSet.getArray(arrayIndex);
+
+ FieldVector fieldVector = listVector.getDataVector();
+ int arrayRowCount = 0;
+
+ listVector.startNewValue(rowCount);
+
+ if (!resultSet.wasNull()) {
+ try (ResultSet rs = array.getResultSet()) {
+
+ while (rs.next()) {
+ // The second column contains the actual data.
+ jdbcToFieldVector(rs, 2, fieldInfo.getJdbcType(), valueCount + arrayRowCount, fieldVector, config);
+ arrayRowCount++;
+ }
+ }
+ }
+
+ listVector.endValue(rowCount, arrayRowCount);
+ listVector.setValueCount(valueCount + arrayRowCount);
+ }
}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
index bafb2dcdcc341..d2d4b24686cf0 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java
@@ -19,7 +19,9 @@
import static org.junit.Assert.*;
+import java.sql.Types;
import java.util.Calendar;
+import java.util.HashMap;
import java.util.Locale;
import java.util.TimeZone;
@@ -95,7 +97,8 @@ public void testConfig() {
assertTrue(newCalendar == config.getCalendar());
}
- @Test public void testIncludeMetadata() {
+ @Test
+ public void testIncludeMetadata() {
JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false);
JdbcToArrowConfig config = builder.build();
@@ -114,4 +117,34 @@ public void testConfig() {
config = new JdbcToArrowConfig(allocator, calendar, false);
assertFalse(config.shouldIncludeMetadata());
}
+
+ @Test
+ public void testArraySubTypes() {
+ JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, calendar, false);
+
+ final int columnIndex = 1;
+ final String columnName = "COLUMN";
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ final HashMap indexMapping = new HashMap();
+ indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT));
+
+ final HashMap fieldMapping = new HashMap();
+ fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY));
+
+ config.setArraySubTypeByColumnIndexMap(indexMapping);
+ config.setArraySubTypeByColumnNameMap(fieldMapping);
+
+ assertNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertNull(config.getArraySubTypeByColumnName(columnName));
+
+ indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT));
+ fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB));
+
+ assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex));
+ assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType());
+ assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType());
+ }
}
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
index 98f799c1b5bf6..2137162667ef1 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
@@ -204,7 +204,7 @@ public void setRowCount(int rowCount) {
this.rowCount = rowCount;
}
- private byte[][] getByteArray(String[] data) {
+ static byte[][] getByteArray(String[] data) {
byte[][] byteArr = new byte[data.length][];
for (int i = 0; i < data.length; i++) {
@@ -213,7 +213,7 @@ private byte[][] getByteArray(String[] data) {
return byteArr;
}
- private byte[][] getHexToByteArray(String[] data) {
+ static byte[][] getHexToByteArray(String[] data) {
byte[][] byteArr = new byte[data.length][];
for (int i = 0; i < data.length; i++) {
@@ -222,7 +222,7 @@ private byte[][] getHexToByteArray(String[] data) {
return byteArr;
}
- private static byte[] hexStringToByteArray(String s) {
+ static byte[] hexStringToByteArray(String s) {
int len = s.length();
byte[] data = new byte[len / 2];
for (int i = 0; i < len; i += 2) {