Skip to content

Commit

Permalink
ARROW-4142: [Java] JDBC Array -> Arrow ListVector
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Pigott authored and xhochy committed Feb 8, 2019
1 parent 2b9155a commit 0f47a6c
Show file tree
Hide file tree
Showing 5 changed files with 481 additions and 225 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
}

Expand Down Expand Up @@ -220,6 +219,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "The configuration must be valid");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -28,16 +29,29 @@
* <p>
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
* fields that are created during the conversion.
* fields that are created during the conversion. Neither field may be <code>null</code>.
* </p>
* <p>
* Neither field may be <code>null</code>.
* If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
* from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
* {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
* {@link org.apache.arrow.vector.FieldVector}.
* </p>
* <p>
* If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
* {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
* information cannot be retrieved from all JDBC implementations (H2 for example, returns
* {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
* or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
* </p>
*/
public final class JdbcToArrowConfig {

private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
Expand All @@ -48,18 +62,21 @@ public final class JdbcToArrowConfig {
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");

this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = includeMetadata;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
*
* @return the calendar.
*/
public Calendar getCalendar() {
Expand All @@ -82,4 +99,78 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}

/**
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
*
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setIncludeMetadata(boolean includeMetadata) {
this.includeMetadata = includeMetadata;
return this;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
*
* @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
if (arraySubTypesByColumnIndex == null) {
return null;
} else {
return arraySubTypesByColumnIndex.get(index);
}
}

/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
this.arraySubTypesByColumnIndex = map;
return this;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
*
* @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
if (arraySubTypesByColumnName == null) {
return null;
} else {
return arraySubTypesByColumnName.get(name);
}
}

/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
this.arraySubTypesByColumnName = map;
return this;
}

/**
* Whether this configuration is valid. The configuration is valid when:
* <ul>
* <li>A memory allocator is provided.</li>
* <li>A calendar is provided.</li>
* </ul>
*
* @return Whether this configuration is valid.
*/
public boolean isValid() {
return (calendar != null) && (allocator != null);
}
}
Loading

0 comments on commit 0f47a6c

Please sign in to comment.