Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-4142: [Java] JDBC Array -> Arrow ListVector #3596

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");

return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar, false));
}

Expand Down Expand Up @@ -220,6 +219,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "The configuration must be valid");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;
import java.util.Map;

import org.apache.arrow.memory.BaseAllocator;

Expand All @@ -28,16 +29,29 @@
* <p>
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
* fields that are created during the conversion.
* fields that are created during the conversion. Neither field may be <code>null</code>.
* </p>
* <p>
* Neither field may be <code>null</code>.
* If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information
* from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the
* {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding
* {@link org.apache.arrow.vector.FieldVector}.
* </p>
* <p>
* If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding
* {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type
* information cannot be retrieved from all JDBC implementations (H2 for example, returns
* {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index
* or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion.
* </p>
*/
public final class JdbcToArrowConfig {

private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
Expand All @@ -48,18 +62,21 @@ public final class JdbcToArrowConfig {
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
* @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata.
*/
JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");

this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = includeMetadata;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
* data types from the {@link ResultSet}, or <code>null</code> if not converting.
*
* @return the calendar.
*/
public Calendar getCalendar() {
Expand All @@ -82,4 +99,78 @@ public BaseAllocator getAllocator() {
public boolean shouldIncludeMetadata() {
return includeMetadata;
}

/**
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata.
*
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setIncludeMetadata(boolean includeMetadata) {
this.includeMetadata = includeMetadata;
return this;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index.
*
* @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) {
if (arraySubTypesByColumnIndex == null) {
return null;
} else {
return arraySubTypesByColumnIndex.get(index);
}
}

/**
* Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) {
this.arraySubTypesByColumnIndex = map;
return this;
}

/**
* Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name.
*
* @param index The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type.
* @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined.
*/
public JdbcFieldInfo getArraySubTypeByColumnName(String name) {
if (arraySubTypesByColumnName == null) {
return null;
} else {
return arraySubTypesByColumnName.get(name);
}
}

/**
* Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}.
*
* @param map The mapping.
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining.
*/
public JdbcToArrowConfig setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) {
this.arraySubTypesByColumnName = map;
return this;
}

/**
* Whether this configuration is valid. The configuration is valid when:
* <ul>
* <li>A memory allocator is provided.</li>
* <li>A calendar is provided.</li>
* </ul>
*
* @return Whether this configuration is valid.
*/
public boolean isValid() {
return (calendar != null) && (allocator != null);
}
}
Loading