-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-3965 [Java] JDBC-To-Arrow Configuration #3133
Closed
Closed
Changes from 9 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
da77cbe
Creating a configuration class for the JDBC-to-Arrow converter.
b270044
Updated validaton & documentation, and unit tests for the new JdbcToA…
df632e3
Updating the SQL tests to include JdbcToArrowConfig versions.
4f1260c
Adding documentation for public static VectorSchemaRoot sqlToArrow(Re…
8d6cf00
Documentation for public static VectorSchemaRoot sqlToArrow(Connectio…
68c91e7
Modifying the jdbcToArrowSchema and jdbcToArrowVectors methods to rec…
bb3165b
Updating the function calls to use the JdbcToArrowConfig versions.
881c6c8
Merge pull request #1 from apache/master
mikepigott 5b1b364
Merge branch 'master' into jdbc-to-arrow-config
3b17c29
Merge pull request #2 from apache/master
mikepigott e5b19ee
Merge pull request #3 from apache/master
mikepigott 789c8c8
Merge pull request #4 from apache/master
mikepigott d7ca982
Merge branch 'master' into jdbc-to-arrow-config
d6c64a7
ARROW-3965: JdbcToArrowConfigBuilder
be95426
ARROW-3965: JDBC-To-Arrow Config Builder javadocs.
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.arrow.adapter.jdbc; | ||
|
||
import java.util.Calendar; | ||
|
||
import org.apache.arrow.memory.BaseAllocator; | ||
|
||
import com.google.common.base.Preconditions; | ||
|
||
/** | ||
* This class configures the JDBC-to-Arrow conversion process. | ||
* <p> | ||
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, | ||
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp} | ||
* fields that are created during the conversion. | ||
* </p> | ||
* <p> | ||
* Neither field may be <code>null</code>. | ||
* </p> | ||
*/ | ||
public final class JdbcToArrowConfig { | ||
private Calendar calendar; | ||
private BaseAllocator allocator; | ||
|
||
/** | ||
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code> | ||
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define | ||
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>. | ||
* | ||
* @param allocator The memory allocator to construct the Arrow vectors with. | ||
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. | ||
*/ | ||
public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) { | ||
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can not be null"); | ||
|
||
this.allocator = allocator; | ||
this.calendar = calendar; | ||
} | ||
|
||
/** | ||
* The calendar to use when defining Arrow Timestamp fields | ||
* and retrieving time-based fields from the database. | ||
* @return the calendar. | ||
*/ | ||
public Calendar getCalendar() { | ||
return calendar; | ||
} | ||
|
||
/** | ||
* Sets the {@link Calendar} to use when constructing timestamp fields in the | ||
* Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>. | ||
* | ||
* @param calendar the calendar to set. | ||
* @exception NullPointerExeption if <code>calendar</code> is <code>null</code>. | ||
*/ | ||
public JdbcToArrowConfig setCalendar(Calendar calendar) { | ||
Preconditions.checkNotNull(calendar, "Calendar object can not be null"); | ||
this.calendar = calendar; | ||
return this; | ||
} | ||
|
||
/** | ||
* The Arrow memory allocator. | ||
* @return the allocator. | ||
*/ | ||
public BaseAllocator getAllocator() { | ||
return allocator; | ||
} | ||
|
||
/** | ||
* Sets the memory allocator to use when construting the Arrow vectors from the ResultSet. | ||
* | ||
* @param allocator the allocator to set. | ||
* @exception NullPointerException if <code>allocator</code> is null. | ||
*/ | ||
public JdbcToArrowConfig setAllocator(BaseAllocator allocator) { | ||
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); | ||
this.allocator = allocator; | ||
return this; | ||
} | ||
|
||
/** | ||
* Whether this configuration is valid. The configuration is valid when: | ||
* <ul> | ||
* <li>A memory allocator is provided.</li> | ||
* <li>A calendar is provided.</li> | ||
* </ul> | ||
* | ||
* @return Whether this configuration is valid. | ||
*/ | ||
public boolean isValid() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a builder would ensure this object is always valid after construction..reducing the need to validate it everywhere.. |
||
return (calendar != null) && (allocator != null); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
import java.util.Calendar; | ||
import java.util.List; | ||
|
||
import org.apache.arrow.memory.RootAllocator; | ||
import org.apache.arrow.vector.BaseFixedWidthVector; | ||
import org.apache.arrow.vector.BigIntVector; | ||
import org.apache.arrow.vector.BitVector; | ||
|
@@ -90,6 +91,21 @@ public class JdbcToArrowUtils { | |
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024; | ||
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256; | ||
|
||
/** | ||
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. | ||
* | ||
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. | ||
* @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. | ||
* @return {@link Schema} | ||
* @throws SQLException on error | ||
*/ | ||
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { | ||
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
} | ||
|
||
/** | ||
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. | ||
* | ||
|
@@ -120,14 +136,15 @@ public class JdbcToArrowUtils { | |
* CLOB --> ArrowType.Utf8 | ||
* BLOB --> ArrowType.Binary | ||
* | ||
* @param rsmd ResultSetMetaData | ||
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. | ||
* @param config The configuration to use when constructing the schema. | ||
* @return {@link Schema} | ||
* @throws SQLException on error | ||
*/ | ||
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { | ||
|
||
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { | ||
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
Preconditions.checkNotNull(config, "The configuration object must not be null"); | ||
Preconditions.checkArgument(config.isValid(), "The configuration object must be valid"); | ||
|
||
List<Field> fields = new ArrayList<>(); | ||
int columnCount = rsmd.getColumnCount(); | ||
|
@@ -179,7 +196,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar | |
break; | ||
case Types.TIMESTAMP: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, | ||
calendar.getTimeZone().getID())), null)); | ||
config.getCalendar().getTimeZone().getID())), null)); | ||
break; | ||
case Types.BINARY: | ||
case Types.VARBINARY: | ||
|
@@ -222,17 +239,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) { | |
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate | ||
* the given Arrow Vector objects. | ||
* | ||
* @param rs ResultSet to use to fetch the data from underlying database | ||
* @param root Arrow {@link VectorSchemaRoot} object to populate | ||
* @param rs ResultSet to use to fetch the data from underlying database | ||
* @param root Arrow {@link VectorSchemaRoot} object to populate | ||
* @param calendar The calendar to use when reading time-based data. | ||
* @throws SQLException on error | ||
*/ | ||
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) | ||
throws SQLException, IOException { | ||
|
||
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); | ||
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); | ||
Preconditions.checkNotNull(root, "Vector Schema cannot be null"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. :) |
||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
} | ||
|
||
/** | ||
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate | ||
* the given Arrow Vector objects. | ||
* | ||
* @param rs ResultSet to use to fetch the data from underlying database | ||
* @param root Arrow {@link VectorSchemaRoot} object to populate | ||
* @param config The configuration to use when reading the data. | ||
* @throws SQLException on error | ||
*/ | ||
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) | ||
throws SQLException, IOException { | ||
|
||
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null"); | ||
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null"); | ||
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null"); | ||
Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid"); | ||
|
||
ResultSetMetaData rsmd = rs.getMetaData(); | ||
int columnCount = rsmd.getColumnCount(); | ||
|
||
|
@@ -289,16 +327,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen | |
break; | ||
case Types.DATE: | ||
updateVector((DateMilliVector) root.getVector(columnName), | ||
rs.getDate(i, calendar), !rs.wasNull(), rowCount); | ||
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount); | ||
break; | ||
case Types.TIME: | ||
updateVector((TimeMilliVector) root.getVector(columnName), | ||
rs.getTime(i, calendar), !rs.wasNull(), rowCount); | ||
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount); | ||
break; | ||
case Types.TIMESTAMP: | ||
// TODO: Need to handle precision such as milli, micro, nano | ||
updateVector((TimeStampVector) root.getVector(columnName), | ||
rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount); | ||
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount); | ||
break; | ||
case Types.BINARY: | ||
case Types.VARBINARY: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i would prefer a builder instead..so that this pojo is immutable..