Skip to content

Commit

Permalink
Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Pigott committed Dec 8, 2018
2 parents b5b0cb1 + 68c91e7 commit 5bfd6a2
Showing 1 changed file with 55 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.List;
import java.util.Map;

import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
Expand Down Expand Up @@ -92,6 +93,21 @@ public class JdbcToArrowUtils {
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;

/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
* @return {@link Schema}
* @throws SQLException on error
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");

return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}

/**
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
*
Expand Down Expand Up @@ -122,15 +138,15 @@ public class JdbcToArrowUtils {
* CLOB --> ArrowType.Utf8
* BLOB --> ArrowType.Binary
*
* @param rsmd ResultSetMetaData
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
* @param config The configuration to use when constructing the schema.
* @return {@link Schema}
* @throws SQLException on error
*/
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar, boolean includeMetadata)
throws SQLException {

public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
Preconditions.checkNotNull(config, "The configuration object must not be null");
Preconditions.checkArgument(config.isValid(), "The configuration object must be valid");

List<Field> fields = new ArrayList<>();
int columnCount = rsmd.getColumnCount();
Expand All @@ -139,7 +155,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
final FieldType fieldType;

final Map<String, String> metadata;
if (includeMetadata) {
if (config.includeMetadata()) {
metadata = new HashMap<String, String>();
metadata.put(JdbcToArrow.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
metadata.put(JdbcToArrow.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
Expand Down Expand Up @@ -196,8 +212,12 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
break;
case Types.TIMESTAMP:
fieldType = new FieldType(true, new ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar.getTimeZone().getID()),
null, metadata);
fieldType =
new FieldType(
true,
new ArrowType.Timestamp(TimeUnit.MILLISECOND, config.getCalendar().getTimeZone().getID()),
null,
metadata);
break;
case Types.BINARY:
case Types.VARBINARY:
Expand Down Expand Up @@ -239,17 +259,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
* the given Arrow Vector objects.
*
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param calendar The calendar to use when reading time-based data.
* @throws SQLException on error
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "Vector Schema cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can't be null");

jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
}

/**
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
* the given Arrow Vector objects.
*
* @param rs ResultSet to use to fetch the data from underlying database
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @param config The configuration to use when reading the data.
* @throws SQLException on error
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid");

ResultSetMetaData rsmd = rs.getMetaData();
int columnCount = rsmd.getColumnCount();

Expand Down Expand Up @@ -306,16 +347,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
break;
case Types.DATE:
updateVector((DateMilliVector) root.getVector(columnName),
rs.getDate(i, calendar), !rs.wasNull(), rowCount);
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.TIME:
updateVector((TimeMilliVector) root.getVector(columnName),
rs.getTime(i, calendar), !rs.wasNull(), rowCount);
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.TIMESTAMP:
// TODO: Need to handle precision such as milli, micro, nano
updateVector((TimeStampVector) root.getVector(columnName),
rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount);
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount);
break;
case Types.BINARY:
case Types.VARBINARY:
Expand Down

0 comments on commit 5bfd6a2

Please sign in to comment.