diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java new file mode 100644 index 0000000000000..db20bef2907cc --- /dev/null +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; + +import com.google.common.base.Preconditions; + +/** + * This class represents the information about a JDBC ResultSet Field that is + * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. + * Currently, this is: + * + */ +public class JdbcFieldInfo { + private final int jdbcType; + private final int precision; + private final int scale; + + /** + * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this constructor + * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and + * scale will be set to 0. + * + * @param jdbcType The {@link java.sql.Types} type. + * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. + */ + public JdbcFieldInfo(int jdbcType) { + Preconditions.checkArgument( + (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC), + "DECIMAL and NUMERIC types require a precision and scale; please use another constructor."); + + this.jdbcType = jdbcType; + this.precision = 0; + this.scale = 0; + } + + /** + * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale. + * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. + * + * @param jdbcType The {@link java.sql.Types} type. + * @param precision The field's numeric precision. + * @param scale The field's numeric scale. + */ + public JdbcFieldInfo(int jdbcType, int precision, int scale) { + this.jdbcType = jdbcType; + this.precision = precision; + this.scale = scale; + } + + /** + * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} column. + * + * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. + * @param column The column to get the field information for (on a 1-based index). + * @throws SQLException If the column information cannot be retrieved. + * @throws NullPointerException if rsmd is null. + * @throws IllegalArgumentException if column is out of bounds. + */ + public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { + Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); + Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); + Preconditions.checkArgument( + column <= rsmd.getColumnCount(), + "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); + + this.jdbcType = rsmd.getColumnType(column); + this.precision = rsmd.getPrecision(column); + this.scale = rsmd.getScale(column); + } + + /** + * The {@link java.sql.Types} type. + */ + public int getJdbcType() { + return jdbcType; + } + + /** + * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + */ + public int getPrecision() { + return precision; + } + + /** + * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + */ + public int getScale() { + return scale; + } +} diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java index 79102043a0f83..d5be486c84390 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -88,7 +88,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); return sqlToArrow(connection, query, config); } @@ -116,7 +116,7 @@ public static VectorSchemaRoot sqlToArrow( Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); Preconditions.checkNotNull(calendar, "Calendar object can not be null"); - return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar, false)); + return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar)); } /** @@ -170,7 +170,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); return sqlToArrow(resultSet, config); } @@ -184,8 +184,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all */ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); - - return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false)); + return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar)); } /** @@ -205,7 +204,7 @@ public static VectorSchemaRoot sqlToArrow( Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); - return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar, false)); + return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar)); } /** diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java index 8f2a8ef54f839..c5fcceec28286 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -18,6 +18,7 @@ package org.apache.arrow.adapter.jdbc; import java.util.Calendar; +import java.util.Map; import org.apache.arrow.memory.BaseAllocator; @@ -28,16 +29,29 @@ *

* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, * and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp} - * fields that are created during the conversion. + * fields that are created during the conversion. Neither field may be null. *

*

- * Neither field may be null. + * If the includeMetadata flag is set, the Arrow field metadata will contain information + * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the + * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding + * {@link org.apache.arrow.vector.FieldVector}. + *

+ *

+ * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding + * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type + * information cannot be retrieved from all JDBC implementations (H2 for example, returns + * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index + * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. *

*/ public final class JdbcToArrowConfig { + private Calendar calendar; private BaseAllocator allocator; private boolean includeMetadata; + private Map arraySubTypesByColumnIndex; + private Map arraySubTypesByColumnName; /** * Constructs a new configuration from the provided allocator and calendar. The allocator @@ -46,20 +60,47 @@ public final class JdbcToArrowConfig { * * @param allocator The memory allocator to construct the Arrow vectors with. * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. */ - JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) { + JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) { Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); this.allocator = allocator; this.calendar = calendar; + this.includeMetadata = false; + this.arraySubTypesByColumnIndex = null; + this.arraySubTypesByColumnName = null; + } + + /** + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define + * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. + * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). + * @param arraySubTypesByColumnName The type of the JDBC array at the column name. + */ + JdbcToArrowConfig( + BaseAllocator allocator, + Calendar calendar, + boolean includeMetadata, + Map arraySubTypesByColumnIndex, + Map arraySubTypesByColumnName) { + + this(allocator, calendar); + this.includeMetadata = includeMetadata; + this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex; + this.arraySubTypesByColumnName = arraySubTypesByColumnName; } /** * The calendar to use when defining Arrow Timestamp fields * and retrieving {@link Date}, {@link Time}, or {@link Timestamp} * data types from the {@link ResultSet}, or null if not converting. + * * @return the calendar. */ public Calendar getCalendar() { @@ -82,4 +123,32 @@ public BaseAllocator getAllocator() { public boolean shouldIncludeMetadata() { return includeMetadata; } + + /** + * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. + * + * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + */ + public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { + if (arraySubTypesByColumnIndex == null) { + return null; + } else { + return arraySubTypesByColumnIndex.get(index); + } + } + + /** + * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. + * + * @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + */ + public JdbcFieldInfo getArraySubTypeByColumnName(String name) { + if (arraySubTypesByColumnName == null) { + return null; + } else { + return arraySubTypesByColumnName.get(name); + } + } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java index 51327aa2d0f5d..ea351d8279e93 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -18,6 +18,7 @@ package org.apache.arrow.adapter.jdbc; import java.util.Calendar; +import java.util.Map; import org.apache.arrow.memory.BaseAllocator; @@ -30,6 +31,8 @@ public class JdbcToArrowConfigBuilder { private Calendar calendar; private BaseAllocator allocator; private boolean includeMetadata; + private Map arraySubTypesByColumnIndex; + private Map arraySubTypesByColumnName; /** * Default constructor for the JdbcToArrowConfigBuilder}. @@ -40,6 +43,8 @@ public JdbcToArrowConfigBuilder() { this.allocator = null; this.calendar = null; this.includeMetadata = false; + this.arraySubTypesByColumnIndex = null; + this.arraySubTypesByColumnName = null; } /** @@ -126,6 +131,29 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { return this; } + /** + * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. + * The column index is 1-based, to match the JDBC column index. + * + * @param map The mapping. + * @return This instance of the JdbcToArrowConfig, for chaining. + */ + public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map map) { + this.arraySubTypesByColumnIndex = map; + return this; + } + + /** + * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. + * + * @param map The mapping. + * @return This instance of the JdbcToArrowConfig, for chaining. + */ + public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map map) { + this.arraySubTypesByColumnName = map; + return this; + } + /** * This builds the {@link JdbcToArrowConfig} from the provided * {@link BaseAllocator} and {@link Calendar}. @@ -134,6 +162,11 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { * @throws NullPointerException if either the allocator or calendar was not set. */ public JdbcToArrowConfig build() { - return new JdbcToArrowConfig(allocator, calendar, includeMetadata); + return new JdbcToArrowConfig( + allocator, + calendar, + includeMetadata, + arraySubTypesByColumnIndex, + arraySubTypesByColumnName); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index 833ca8410a969..f54363f5befb2 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -25,6 +25,7 @@ import java.io.InputStream; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; +import java.sql.Array; import java.sql.Blob; import java.sql.Clob; import java.sql.Date; @@ -59,6 +60,7 @@ import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.holders.NullableBigIntHolder; import org.apache.arrow.vector.holders.NullableBitHolder; import org.apache.arrow.vector.holders.NullableDateMilliHolder; @@ -95,6 +97,15 @@ public class JdbcToArrowUtils { private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024; private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256; + private static final int JDBC_ARRAY_VALUE_COLUMN = 2; + + /** + * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. + */ + public static Calendar getUtcCalendar() { + return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + } + /** * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. * @@ -107,67 +118,46 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); - } - - /** - * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. - */ - public static Calendar getUtcCalendar() { - return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); } /** - * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. - * - *

This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types. - * - *

CHAR --> ArrowType.Utf8 - * NCHAR --> ArrowType.Utf8 - * VARCHAR --> ArrowType.Utf8 - * NVARCHAR --> ArrowType.Utf8 - * LONGVARCHAR --> ArrowType.Utf8 - * LONGNVARCHAR --> ArrowType.Utf8 - * NUMERIC --> ArrowType.Decimal(precision, scale) - * DECIMAL --> ArrowType.Decimal(precision, scale) - * BIT --> ArrowType.Bool - * TINYINT --> ArrowType.Int(8, signed) - * SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) - * BIGINT --> ArrowType.Int(64, signed) - * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - * BINARY --> ArrowType.Binary - * VARBINARY --> ArrowType.Binary - * LONGVARBINARY --> ArrowType.Binary - * DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) - * CLOB --> ArrowType.Utf8 - * BLOB --> ArrowType.Binary + * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. + *

+ * The {@link JdbcToArrowUtils#getArrowTypeForJdbcField(JdbcFieldInfo, Calendar)} method is used to construct a + * {@link org.apache.arrow.vector.types.pojo.ArrowType} for each field in the {@link java.sql.ResultSetMetaData}. + *

+ *

+ * If {@link JdbcToArrowConfig#getIncludeMetadata()} returns true, the following fields + * will be added to the {@link FieldType#getMetadata()}: + *

    + *
  • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
  • + *
  • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
  • + *
  • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnName(int)}
  • + *
  • {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
  • + *
+ *

+ *

+ * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up + * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be + * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. + *

* * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. * @param config The configuration to use when constructing the schema. * @return {@link Schema} * @throws SQLException on error + * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} but the + * config does not have a sub-type definition for it. */ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(config, "The configuration object must not be null"); - final String timezone; - if (config.getCalendar() != null) { - timezone = config.getCalendar().getTimeZone().getID(); - } else { - timezone = null; - } - List fields = new ArrayList<>(); int columnCount = rsmd.getColumnCount(); for (int i = 1; i <= columnCount; i++) { final String columnName = rsmd.getColumnName(i); - final FieldType fieldType; final Map metadata; if (config.shouldIncludeMetadata()) { @@ -181,83 +171,172 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig metadata = null; } - switch (rsmd.getColumnType(i)) { - case Types.BOOLEAN: - case Types.BIT: - fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata); - break; - case Types.TINYINT: - fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata); - break; - case Types.SMALLINT: - fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata); - break; - case Types.INTEGER: - fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata); - break; - case Types.BIGINT: - fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata); - break; - case Types.NUMERIC: - case Types.DECIMAL: - int precision = rsmd.getPrecision(i); - int scale = rsmd.getScale(i); - fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata); - break; - case Types.REAL: - case Types.FLOAT: - fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata); - break; - case Types.DOUBLE: - fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata); - break; - case Types.CHAR: - case Types.NCHAR: - case Types.VARCHAR: - case Types.NVARCHAR: - case Types.LONGVARCHAR: - case Types.LONGNVARCHAR: - case Types.CLOB: - fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata); - break; - case Types.DATE: - fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata); - break; - case Types.TIME: - fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata); - break; - case Types.TIMESTAMP: - fieldType = - new FieldType( - true, - new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone), - null, - metadata); - break; - case Types.BINARY: - case Types.VARBINARY: - case Types.LONGVARBINARY: - case Types.BLOB: - fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata); - break; - - case Types.ARRAY: - // TODO Need to handle this type - // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); - default: - // no-op, shouldn't get here - fieldType = null; - break; - } + final ArrowType arrowType = getArrowTypeForJdbcField(new JdbcFieldInfo(rsmd, i), config.getCalendar()); + if (arrowType != null) { + final FieldType fieldType = new FieldType(true, arrowType, /* dictionary encoding */ null, metadata); + + List children = null; + if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { + final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); + if (arrayFieldInfo == null) { + throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); + } + children = new ArrayList(); + final ArrowType childType = + getArrowTypeForJdbcField(arrayFieldInfo, config.getCalendar()); + children.add(new Field("child", FieldType.nullable(childType), null)); + } - if (fieldType != null) { - fields.add(new Field(columnName, fieldType, null)); + fields.add(new Field(columnName, fieldType, children)); } } return new Schema(fields, null); } + /** + * Creates an {@link org.apache.arrow.vector.types.pojo.ArrowType} + * from the {@link JdbcFieldInfo} and {@link java.util.Calendar}. + * + *

This method currently performs following type mapping for JDBC SQL data types to corresponding Arrow data types. + * + *

    + *
  • CHAR --> ArrowType.Utf8
  • + *
  • NCHAR --> ArrowType.Utf8
  • + *
  • VARCHAR --> ArrowType.Utf8
  • + *
  • NVARCHAR --> ArrowType.Utf8
  • + *
  • LONGVARCHAR --> ArrowType.Utf8
  • + *
  • LONGNVARCHAR --> ArrowType.Utf8
  • + *
  • NUMERIC --> ArrowType.Decimal(precision, scale)
  • + *
  • DECIMAL --> ArrowType.Decimal(precision, scale)
  • + *
  • BIT --> ArrowType.Bool
  • + *
  • TINYINT --> ArrowType.Int(8, signed)
  • + *
  • SMALLINT --> ArrowType.Int(16, signed)
  • + *
  • INTEGER --> ArrowType.Int(32, signed)
  • + *
  • BIGINT --> ArrowType.Int(64, signed)
  • + *
  • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • + *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • + *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • + *
  • BINARY --> ArrowType.Binary
  • + *
  • VARBINARY --> ArrowType.Binary
  • + *
  • LONGVARBINARY --> ArrowType.Binary
  • + *
  • DATE --> ArrowType.Date(DateUnit.MILLISECOND)
  • + *
  • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • + *
  • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
  • + *
  • CLOB --> ArrowType.Utf8
  • + *
  • BLOB --> ArrowType.Binary
  • + *
+ * + * @param fieldInfo The field information to construct the ArrowType from. + * @param calendar The calendar to use when constructing the ArrowType.Timestamp + * for {@link java.sql.Types#TIMESTAMP} types. + * @return The corresponding ArrowType. + * @throws NullPointerException if either fieldInfo or calendar are null. + */ + public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calendar calendar) { + Preconditions.checkNotNull(fieldInfo, "JdbcFieldInfo object cannot be null"); + + final String timezone; + if (calendar != null) { + timezone = calendar.getTimeZone().getID(); + } else { + timezone = null; + } + + + final ArrowType arrowType; + + switch (fieldInfo.getJdbcType()) { + case Types.BOOLEAN: + case Types.BIT: + arrowType = new ArrowType.Bool(); + break; + case Types.TINYINT: + arrowType = new ArrowType.Int(8, true); + break; + case Types.SMALLINT: + arrowType = new ArrowType.Int(16, true); + break; + case Types.INTEGER: + arrowType = new ArrowType.Int(32, true); + break; + case Types.BIGINT: + arrowType = new ArrowType.Int(64, true); + break; + case Types.NUMERIC: + case Types.DECIMAL: + int precision = fieldInfo.getPrecision(); + int scale = fieldInfo.getScale(); + arrowType = new ArrowType.Decimal(precision, scale); + break; + case Types.REAL: + case Types.FLOAT: + arrowType = new ArrowType.FloatingPoint(SINGLE); + break; + case Types.DOUBLE: + arrowType = new ArrowType.FloatingPoint(DOUBLE); + break; + case Types.CHAR: + case Types.NCHAR: + case Types.VARCHAR: + case Types.NVARCHAR: + case Types.LONGVARCHAR: + case Types.LONGNVARCHAR: + case Types.CLOB: + arrowType = new ArrowType.Utf8(); + break; + case Types.DATE: + arrowType = new ArrowType.Date(DateUnit.MILLISECOND); + break; + case Types.TIME: + arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); + break; + case Types.TIMESTAMP: + arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone); + break; + case Types.BINARY: + case Types.VARBINARY: + case Types.LONGVARBINARY: + case Types.BLOB: + arrowType = new ArrowType.Binary(); + break; + case Types.ARRAY: + arrowType = new ArrowType.List(); + break; + default: + // no-op, shouldn't get here + arrowType = null; + break; + } + + return arrowType; + } + + /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is. + * If no sub-type can be found, returns null. + */ + private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( + ResultSetMetaData rsmd, + int arrayColumn, + JdbcToArrowConfig config) + throws SQLException { + + Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); + Preconditions.checkNotNull(config, "Configuration must not be null"); + Preconditions.checkArgument( + arrayColumn > 0, + "ResultSetMetaData columns start with 1; column cannot be less than 1"); + Preconditions.checkArgument( + arrayColumn <= rsmd.getColumnCount(), + "Column number cannot be more than the number of columns"); + + JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn); + if (fieldInfo == null) { + fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnName(arrayColumn)); + } + return fieldInfo; + } + private static void allocateVectors(VectorSchemaRoot root, int size) { List vectors = root.getFieldVectors(); for (FieldVector fieldVector : vectors) { @@ -284,9 +363,10 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen throws SQLException, IOException { Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); - Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); + Preconditions.checkNotNull(root, "Vector Schema cannot be null"); + Preconditions.checkNotNull(calendar, "Calendar object can't be null"); - jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); + jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); } /** @@ -310,117 +390,133 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT allocateVectors(root, DEFAULT_BUFFER_SIZE); - final Calendar calendar = config.getCalendar(); - int rowCount = 0; while (rs.next()) { for (int i = 1; i <= columnCount; i++) { - String columnName = rsmd.getColumnName(i); - switch (rsmd.getColumnType(i)) { - case Types.BOOLEAN: - case Types.BIT: - updateVector((BitVector) root.getVector(columnName), - rs.getBoolean(i), !rs.wasNull(), rowCount); - break; - case Types.TINYINT: - updateVector((TinyIntVector) root.getVector(columnName), - rs.getInt(i), !rs.wasNull(), rowCount); - break; - case Types.SMALLINT: - updateVector((SmallIntVector) root.getVector(columnName), - rs.getInt(i), !rs.wasNull(), rowCount); - break; - case Types.INTEGER: - updateVector((IntVector) root.getVector(columnName), - rs.getInt(i), !rs.wasNull(), rowCount); - break; - case Types.BIGINT: - updateVector((BigIntVector) root.getVector(columnName), - rs.getLong(i), !rs.wasNull(), rowCount); - break; - case Types.NUMERIC: - case Types.DECIMAL: - updateVector((DecimalVector) root.getVector(columnName), - rs.getBigDecimal(i), !rs.wasNull(), rowCount); - break; - case Types.REAL: - case Types.FLOAT: - updateVector((Float4Vector) root.getVector(columnName), - rs.getFloat(i), !rs.wasNull(), rowCount); - break; - case Types.DOUBLE: - updateVector((Float8Vector) root.getVector(columnName), - rs.getDouble(i), !rs.wasNull(), rowCount); - break; - case Types.CHAR: - case Types.NCHAR: - case Types.VARCHAR: - case Types.NVARCHAR: - case Types.LONGVARCHAR: - case Types.LONGNVARCHAR: - updateVector((VarCharVector) root.getVector(columnName), - rs.getString(i), !rs.wasNull(), rowCount); - break; - case Types.DATE: - final Date date; - if (calendar != null) { - date = rs.getDate(i, calendar); - } else { - date = rs.getDate(i); - } - - updateVector((DateMilliVector) root.getVector(columnName), date, !rs.wasNull(), rowCount); - break; - case Types.TIME: - final Time time; - if (calendar != null) { - time = rs.getTime(i, calendar); - } else { - time = rs.getTime(i); - } - - updateVector((TimeMilliVector) root.getVector(columnName), time, !rs.wasNull(), rowCount); - break; - case Types.TIMESTAMP: - final Timestamp ts; - if (calendar != null) { - ts = rs.getTimestamp(i, calendar); - } else { - ts = rs.getTimestamp(i); - } - - // TODO: Need to handle precision such as milli, micro, nano - updateVector((TimeStampVector) root.getVector(columnName), ts, !rs.wasNull(), rowCount); - break; - case Types.BINARY: - case Types.VARBINARY: - case Types.LONGVARBINARY: - updateVector((VarBinaryVector) root.getVector(columnName), - rs.getBinaryStream(i), !rs.wasNull(), rowCount); - break; - case Types.ARRAY: - // TODO Need to handle this type - // fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); - break; - case Types.CLOB: - updateVector((VarCharVector) root.getVector(columnName), - rs.getClob(i), !rs.wasNull(), rowCount); - break; - case Types.BLOB: - updateVector((VarBinaryVector) root.getVector(columnName), - rs.getBlob(i), !rs.wasNull(), rowCount); - break; - - default: - // no-op, shouldn't get here - break; - } + jdbcToFieldVector( + rs, + i, + rs.getMetaData().getColumnType(i), + rowCount, + root.getVector(rsmd.getColumnName(i)), + config); } rowCount++; } root.setRowCount(rowCount); } + private static void jdbcToFieldVector( + ResultSet rs, + int columnIndex, + int jdbcColType, + int rowCount, + FieldVector vector, + JdbcToArrowConfig config) + throws SQLException, IOException { + + final Calendar calendar = config.getCalendar(); + + switch (jdbcColType) { + case Types.BOOLEAN: + case Types.BIT: + updateVector((BitVector) vector, + rs.getBoolean(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.TINYINT: + updateVector((TinyIntVector) vector, + rs.getInt(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.SMALLINT: + updateVector((SmallIntVector) vector, + rs.getInt(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.INTEGER: + updateVector((IntVector) vector, + rs.getInt(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.BIGINT: + updateVector((BigIntVector) vector, + rs.getLong(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.NUMERIC: + case Types.DECIMAL: + updateVector((DecimalVector) vector, + rs.getBigDecimal(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.REAL: + case Types.FLOAT: + updateVector((Float4Vector) vector, + rs.getFloat(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.DOUBLE: + updateVector((Float8Vector) vector, + rs.getDouble(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.CHAR: + case Types.NCHAR: + case Types.VARCHAR: + case Types.NVARCHAR: + case Types.LONGVARCHAR: + case Types.LONGNVARCHAR: + updateVector((VarCharVector) vector, + rs.getString(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.DATE: + final Date date; + if (calendar != null) { + date = rs.getDate(columnIndex, calendar); + } else { + date = rs.getDate(columnIndex); + } + + updateVector((DateMilliVector) vector, date, !rs.wasNull(), rowCount); + break; + case Types.TIME: + final Time time; + if (calendar != null) { + time = rs.getTime(columnIndex, calendar); + } else { + time = rs.getTime(columnIndex); + } + + updateVector((TimeMilliVector) vector, time, !rs.wasNull(), rowCount); + break; + case Types.TIMESTAMP: + final Timestamp ts; + if (calendar != null) { + ts = rs.getTimestamp(columnIndex, calendar); + } else { + ts = rs.getTimestamp(columnIndex); + } + + // TODO: Need to handle precision such as milli, micro, nano + updateVector((TimeStampVector) vector, ts, !rs.wasNull(), rowCount); + break; + case Types.BINARY: + case Types.VARBINARY: + case Types.LONGVARBINARY: + updateVector((VarBinaryVector) vector, + rs.getBinaryStream(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.ARRAY: + updateVector((ListVector) vector, rs, columnIndex, rowCount, config); + break; + case Types.CLOB: + updateVector((VarCharVector) vector, + rs.getClob(columnIndex), !rs.wasNull(), rowCount); + break; + case Types.BLOB: + updateVector((VarBinaryVector) vector, + rs.getBlob(columnIndex), !rs.wasNull(), rowCount); + break; + + default: + // no-op, shouldn't get here + break; + } + } + private static void updateVector(BitVector bitVector, boolean value, boolean isNonNull, int rowCount) { NullableBitHolder holder = new NullableBitHolder(); holder.isSet = isNonNull ? 1 : 0; @@ -620,4 +716,45 @@ private static void updateVector(VarBinaryVector varBinaryVector, Blob blob, boo updateVector(varBinaryVector, blob != null ? blob.getBinaryStream() : null, isNonNull, rowCount); } + private static void updateVector( + ListVector listVector, + ResultSet resultSet, + int arrayIndex, + int rowCount, + JdbcToArrowConfig config) + throws SQLException, IOException { + + final JdbcFieldInfo fieldInfo = getJdbcFieldInfoForArraySubType(resultSet.getMetaData(), arrayIndex, config); + if (fieldInfo == null) { + throw new IllegalArgumentException("Column " + arrayIndex + " is an array of unknown type."); + } + + final int valueCount = listVector.getValueCount(); + final Array array = resultSet.getArray(arrayIndex); + + FieldVector fieldVector = listVector.getDataVector(); + int arrayRowCount = 0; + + if (!resultSet.wasNull()) { + listVector.startNewValue(rowCount); + + try (ResultSet rs = array.getResultSet()) { + + while (rs.next()) { + jdbcToFieldVector( + rs, + JDBC_ARRAY_VALUE_COLUMN, + fieldInfo.getJdbcType(), + valueCount + arrayRowCount, + fieldVector, + config); + arrayRowCount++; + } + } + + listVector.endValue(rowCount, arrayRowCount); + } + + listVector.setValueCount(valueCount + arrayRowCount); + } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java new file mode 100644 index 0000000000000..3d6074be613b8 --- /dev/null +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.junit.Assert.*; + +import java.sql.Types; + +import org.junit.Test; + +public class JdbcFieldInfoTest { + + @Test + public void testCreateJdbcFieldInfoWithJdbcType() { + JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB); + + assertEquals(Types.BLOB, fieldInfo.getJdbcType()); + assertEquals(0, fieldInfo.getPrecision()); + assertEquals(0, fieldInfo.getScale()); + } + + public void testCreateJdbcFieldInfoWithJdbcTypePrecisionAndScale() { + JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB, 1, 2); + + assertEquals(Types.BLOB, fieldInfo.getJdbcType()); + assertEquals(1, fieldInfo.getPrecision()); + assertEquals(2, fieldInfo.getScale()); + } +} diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index bafb2dcdcc341..46d8b044c9014 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -19,7 +19,9 @@ import static org.junit.Assert.*; +import java.sql.Types; import java.util.Calendar; +import java.util.HashMap; import java.util.Locale; import java.util.TimeZone; @@ -34,7 +36,7 @@ public class JdbcToArrowConfigTest { @Test(expected = NullPointerException.class) public void testConfigNullArguments() { - new JdbcToArrowConfig(null, null, false); + new JdbcToArrowConfig(null, null); } @Test(expected = NullPointerException.class) @@ -43,7 +45,7 @@ public void testBuilderNullArguments() { } public void testConfigNullCalendar() { - JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null, false); + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null); assertNull(config.getCalendar()); } @@ -56,7 +58,7 @@ public void testBuilderNullCalendar() { @Test(expected = NullPointerException.class) public void testConfigNullAllocator() { - new JdbcToArrowConfig(null, calendar, false); + new JdbcToArrowConfig(null, calendar); } @Test(expected = NullPointerException.class) @@ -95,7 +97,8 @@ public void testConfig() { assertTrue(newCalendar == config.getCalendar()); } - @Test public void testIncludeMetadata() { + @Test + public void testIncludeMetadata() { JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); JdbcToArrowConfig config = builder.build(); @@ -108,10 +111,42 @@ public void testConfig() { config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); assertTrue(config.shouldIncludeMetadata()); - config = new JdbcToArrowConfig(allocator, calendar, true); + config = new JdbcToArrowConfig(allocator, calendar, true, null, null); assertTrue(config.shouldIncludeMetadata()); - config = new JdbcToArrowConfig(allocator, calendar, false); + config = new JdbcToArrowConfig(allocator, calendar, false, null, null); assertFalse(config.shouldIncludeMetadata()); } + + @Test + public void testArraySubTypes() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); + JdbcToArrowConfig config = builder.build(); + + final int columnIndex = 1; + final String columnName = "COLUMN"; + + assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertNull(config.getArraySubTypeByColumnName(columnName)); + + final HashMap indexMapping = new HashMap(); + indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT)); + + final HashMap fieldMapping = new HashMap(); + fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY)); + + builder.setArraySubTypeByColumnIndexMap(indexMapping); + builder.setArraySubTypeByColumnNameMap(fieldMapping); + config = builder.build(); + + assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertNull(config.getArraySubTypeByColumnName(columnName)); + + indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT)); + fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB)); + + assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType()); + assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType()); + } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java index 98f799c1b5bf6..2137162667ef1 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java @@ -204,7 +204,7 @@ public void setRowCount(int rowCount) { this.rowCount = rowCount; } - private byte[][] getByteArray(String[] data) { + static byte[][] getByteArray(String[] data) { byte[][] byteArr = new byte[data.length][]; for (int i = 0; i < data.length; i++) { @@ -213,7 +213,7 @@ private byte[][] getByteArray(String[] data) { return byteArr; } - private byte[][] getHexToByteArray(String[] data) { + static byte[][] getHexToByteArray(String[] data) { byte[][] byteArr = new byte[data.length][]; for (int i = 0; i < data.length; i++) { @@ -222,7 +222,7 @@ private byte[][] getHexToByteArray(String[] data) { return byteArr; } - private static byte[] hexStringToByteArray(String s) { + static byte[] hexStringToByteArray(String s) { int len = s.length(); byte[] data = new byte[len / 2]; for (int i = 0; i < len; i += 2) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java new file mode 100644 index 0000000000000..e0f8ad957d35b --- /dev/null +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -0,0 +1,374 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.junit.Assert.*; + +import java.sql.Array; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.HashMap; +import java.util.Map; + +import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; +import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import io.netty.buffer.ArrowBuf; + +public class JdbcToArrowArrayTest { + private Connection conn = null; + + private static final String CREATE_STATEMENT = + "CREATE TABLE array_table (id INTEGER, int_array ARRAY, float_array ARRAY, string_array ARRAY);"; + private static final String INSERT_STATEMENT = + "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; + private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; + private static final String DROP_STATEMENT = "DROP TABLE array_table;"; + + private static Map arrayFieldMapping; + + private static final String INT_ARRAY_FIELD_NAME = "INT_ARRAY"; + private static final String FLOAT_ARRAY_FIELD_NAME = "FLOAT_ARRAY"; + private static final String STRING_ARRAY_FIELD_NAME = "STRING_ARRAY"; + + @Before + public void setUp() throws Exception { + String url = "jdbc:h2:mem:JdbcToArrowTest"; + String driver = "org.h2.Driver"; + Class.forName(driver); + conn = DriverManager.getConnection(url); + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(CREATE_STATEMENT); + } + + arrayFieldMapping = new HashMap(); + arrayFieldMapping.put(INT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.INTEGER)); + arrayFieldMapping.put(FLOAT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.REAL)); + arrayFieldMapping.put(STRING_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.VARCHAR)); + } + + // This test verifies reading an array field from an H2 database + // works as expected. If this test fails, something is either wrong + // with the setup, or the H2 SQL behavior changed. + @Test + public void testReadH2Array() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = generateIntegerArrayField(rowCount); + Float[][] floatArrays = generateFloatArrayField(rowCount); + String[][] strArrays = generateStringArrayField(rowCount); + + insertRows(rowCount, intArrays, floatArrays, strArrays); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + ResultSetMetaData rsmd = resultSet.getMetaData(); + assertEquals(3, rsmd.getColumnCount()); + + for (int i = 1; i <= rsmd.getColumnCount(); ++i) { + assertEquals(Types.ARRAY, rsmd.getColumnType(i)); + } + + int rowNum = 0; + + while (resultSet.next()) { + Array intArray = resultSet.getArray(INT_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = intArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(intArrays[rowNum][arrayIndex].intValue(), rs.getInt(2)); + ++arrayIndex; + } + assertEquals(intArrays[rowNum].length, arrayIndex); + } + + Array floatArray = resultSet.getArray(FLOAT_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = floatArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(floatArrays[rowNum][arrayIndex].floatValue(), rs.getFloat(2), 0.001); + ++arrayIndex; + } + assertEquals(floatArrays[rowNum].length, arrayIndex); + } + + Array strArray = resultSet.getArray(STRING_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = strArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(strArrays[rowNum][arrayIndex], rs.getString(2)); + ++arrayIndex; + } + assertEquals(strArrays[rowNum].length, arrayIndex); + } + + ++rowNum; + } + + assertEquals(rowCount, rowNum); + } + } + + @Test + public void testJdbcToArrow() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = generateIntegerArrayField(rowCount); + Float[][] floatArrays = generateFloatArrayField(rowCount); + String[][] strArrays = generateStringArrayField(rowCount); + + insertRows(rowCount, intArrays, floatArrays, strArrays); + + final JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); + + final JdbcToArrowConfig config = builder.build(); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + final VectorSchemaRoot vector = JdbcToArrow.sqlToArrow(resultSet, config); + + assertEquals(rowCount, vector.getRowCount()); + + assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); + } + } + + @Test + public void testJdbcToArrowWithNulls() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = { + null, + {0}, + {1}, + {}, + }; + + Float[][] floatArrays = { + { 2.0f }, + null, + { 3.0f }, + {}, + }; + + String[][] stringArrays = { + {"4"}, + null, + {"5"}, + {}, + }; + + insertRows(rowCount, intArrays, floatArrays, stringArrays); + + final JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); + + final JdbcToArrowConfig config = builder.build(); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + final VectorSchemaRoot vector = JdbcToArrow.sqlToArrow(resultSet, config); + + assertEquals(rowCount, vector.getRowCount()); + + assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); + } + } + + private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) { + IntVector vector = (IntVector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + + for (int i = prevOffset; i < offset; ++i) { + assertEquals(expectedValues[row][i - prevOffset].intValue(), vector.get(i)); + } + + prevOffset = offset; + } + } + + private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) { + Float4Vector vector = (Float4Vector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + + for (int i = prevOffset; i < offset; ++i) { + assertEquals(expectedValues[row][i - prevOffset].floatValue(), vector.get(i), 0); + } + + prevOffset = offset; + } + } + + private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) { + VarCharVector vector = (VarCharVector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + for (int i = prevOffset; i < offset; ++i) { + assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i)); + } + + prevOffset = offset; + } + } + + @After + public void tearDown() throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(DROP_STATEMENT); + } finally { + if (conn != null) { + conn.close(); + conn = null; + } + } + } + + private Integer[][] generateIntegerArrayField(int numRows) { + Integer[][] result = new Integer[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new Integer[]{val, val + 1, val + 2, val + 3}; + } + + return result; + } + + private Float[][] generateFloatArrayField(int numRows) { + Float[][] result = new Float[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; + } + + return result; + } + + private String[][] generateStringArrayField(int numRows) { + String[][] result = new String[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new String[]{ + String.valueOf(val), + String.valueOf(val + 1), + String.valueOf(val + 2), + String.valueOf(val + 3) }; + } + + return result; + } + + private void insertRows( + int numRows, + Integer[][] integerArrays, + Float[][] floatArrays, + String[][] strArrays) + throws SQLException { + + // Insert 4 Rows + try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { + + for (int i = 0; i < numRows; ++i) { + Integer[] integerArray = integerArrays[i]; + Float[] floatArray = floatArrays[i]; + String[] strArray = strArrays[i]; + + Array intArray = conn.createArrayOf("INT", integerArray); + Array realArray = conn.createArrayOf("REAL", floatArray); + Array varcharArray = conn.createArrayOf("VARCHAR", strArray); + + // Insert Arrays of 4 Values in Each Row + stmt.setInt(1, i); + stmt.setArray(2, intArray); + stmt.setArray(3, realArray); + stmt.setArray(4, varcharArray); + + stmt.executeUpdate(); + + intArray.free(); + realArray.free(); + varcharArray.free(); + } + } + } +}