Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,17 @@ public class JdbcToArrow {
* If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use
* overloaded API that taken Calendar object instance.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since hte caller has passed
* the connection object it's the responsibility of the caller to close or return the connection to the pool.
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as ResultSet and Statement objects.
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BaseAllocator allocator) throws SQLException, IOException {
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BaseAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Expand All @@ -93,15 +96,21 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object. Since hte caller has passed
* the connection object it's the responsibility of the caller to close or return the connection to the pool.
* @param connection Database connection to be used. This method will not close the passed connection object. Since
* the caller has passed the connection object it's the responsibility of the caller to close or
* return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as ResultSet and Statement objects.
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BaseAllocator allocator, Calendar calendar) throws SQLException, IOException {
public static VectorSchemaRoot sqlToArrow(
Connection connection,
String query,
BaseAllocator allocator,
Calendar calendar) throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Expand All @@ -113,8 +122,8 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This method
* uses the default RootAllocator and Calendar object.
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This
* method uses the default RootAllocator and Calendar object.
*
* @param resultSet
* @return Arrow Data Objects {@link VectorSchemaRoot}
Expand All @@ -134,7 +143,8 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator) throws SQLException, IOException {
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

Expand Down Expand Up @@ -168,7 +178,8 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException
*/
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar) throws SQLException, IOException {
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator, Calendar calendar)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,17 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null));
break;
case Types.TIMESTAMP:
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar.getTimeZone().getID())), null));
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND,
calendar.getTimeZone().getID())), null));
break;
case Types.BINARY:
case Types.VARBINARY:
case Types.LONGVARBINARY:
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null));
break;
case Types.ARRAY:
// TODO Need to handle this type
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
// TODO Need to handle this type
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null));
break;
case Types.CLOB:
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null));
Expand Down Expand Up @@ -226,7 +227,8 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
* @param root Arrow {@link VectorSchemaRoot} object to populate
* @throws SQLException
*/
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) throws SQLException, IOException {
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
throws SQLException, IOException {

Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
Expand Down Expand Up @@ -449,7 +451,11 @@ private static void updateVector(TimeMilliVector timeMilliVector, Time time, boo
timeMilliVector.setValueCount(rowCount + 1);
}

private static void updateVector(TimeStampVector timeStampVector, Timestamp timestamp, boolean isNonNull, int rowCount) {
private static void updateVector(
TimeStampVector timeStampVector,
Timestamp timestamp,
boolean isNonNull,
int rowCount) {
//TODO: Need to handle precision such as milli, micro, nano
timeStampVector.setValueCount(rowCount + 1);
if (timestamp != null) {
Expand All @@ -459,7 +465,11 @@ private static void updateVector(TimeStampVector timeStampVector, Timestamp time
}
}

private static void updateVector(VarBinaryVector varBinaryVector, InputStream is, boolean isNonNull, int rowCount) throws IOException {
private static void updateVector(
VarBinaryVector varBinaryVector,
InputStream is,
boolean isNonNull,
int rowCount) throws IOException {
varBinaryVector.setValueCount(rowCount + 1);
if (isNonNull && is != null) {
VarBinaryHolder holder = new VarBinaryHolder();
Expand All @@ -484,7 +494,11 @@ private static void updateVector(VarBinaryVector varBinaryVector, InputStream is
}
}

private static void updateVector(VarCharVector varcharVector, Clob clob, boolean isNonNull, int rowCount) throws SQLException, IOException {
private static void updateVector(
VarCharVector varcharVector,
Clob clob,
boolean isNonNull,
int rowCount) throws SQLException, IOException {
varcharVector.setValueCount(rowCount + 1);
if (isNonNull && clob != null) {
VarCharHolder holder = new VarCharHolder();
Expand All @@ -510,7 +524,8 @@ private static void updateVector(VarCharVector varcharVector, Clob clob, boolean
}
}

private static void updateVector(VarBinaryVector varBinaryVector, Blob blob, boolean isNonNull, int rowCount) throws SQLException, IOException {
private static void updateVector(VarBinaryVector varBinaryVector, Blob blob, boolean isNonNull, int rowCount)
throws SQLException, IOException {
updateVector(varBinaryVector, blob != null ? blob.getBinaryStream() : null, isNonNull, rowCount);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ public void destroy() throws SQLException {
* @throws ClassNotFoundException
* @throws IOException
*/
public static Object[][] prepareTestData(String[] testFiles, Class clss) throws SQLException, ClassNotFoundException, IOException {
public static Object[][] prepareTestData(String[] testFiles, Class clss)
throws SQLException, ClassNotFoundException, IOException {
Object[][] tableArr = new Object[testFiles.length][];
int i = 0;
for (String testFile : testFiles) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
import org.junit.runners.Parameterized.Parameters;

/**
* JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset, including
* the multi-byte CJK characters for H2 database
* JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset,
* including the multi-byte CJK characters for H2 database
*/
@RunWith(Parameterized.class)
public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest {
Expand All @@ -52,10 +52,10 @@ public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest {
private static final String CLOB = "CLOB_FIELD15";

private static final String[] testFiles = {
"h2/test1_charset_h2.yml",
"h2/test1_charset_ch_h2.yml",
"h2/test1_charset_jp_h2.yml",
"h2/test1_charset_kr_h2.yml"
"h2/test1_charset_h2.yml",
"h2/test1_charset_ch_h2.yml",
"h2/test1_charset_jp_h2.yml",
"h2/test1_charset_kr_h2.yml"
};

/**
Expand Down Expand Up @@ -106,13 +106,16 @@ public static Collection<Object[]> getTestData() throws SQLException, ClassNotFo
*/
@Test
public void testJdbcToArroValues() throws SQLException, IOException {
testDataSets(JdbcToArrow.sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE),
Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), new RootAllocator(Integer.MAX_VALUE),
Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery())));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), new RootAllocator(Integer.MAX_VALUE)));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
new RootAllocator(Integer.MAX_VALUE)));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
Calendar.getInstance()));
}

/**
Expand All @@ -122,12 +125,12 @@ public void testJdbcToArroValues() throws SQLException, IOException {
*/
public void testDataSets(VectorSchemaRoot root) {
assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(),
getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8));
getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8));

assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(),
getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8));
getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8));

assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(),
getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8));
getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8));
}
}
Loading