-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARROW-3966 [Java] JDBC Column Metadata in Arrow Field Metadata #3134
Changes from 31 commits
5af1b5b
523387f
a78c770
da77cbe
b270044
df632e3
fe097c8
e34a9e7
4f1260c
8d6cf00
b5b0cb1
68c91e7
5bfd6a2
a6fb1be
bb3165b
7e9ce37
7b4527c
72d64cc
03091a8
881c6c8
1ceac9e
d847ebc
3b17c29
e5b19ee
789c8c8
509a1cc
4a6de86
69022c2
2928513
cfb2ba6
cc6cc88
65741a9
e9a9b2b
7049c36
02f2f34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.arrow.adapter.jdbc; | ||
|
||
public class Constants { | ||
|
||
public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; | ||
public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; | ||
public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; | ||
public static final String SQL_TYPE_KEY = "SQL_TYPE"; | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ | |
public class JdbcToArrowConfigBuilder { | ||
private Calendar calendar; | ||
private BaseAllocator allocator; | ||
private boolean includeMetadata; | ||
|
||
/** | ||
* Default constructor for the <code>JdbcToArrowConfigBuilder}</code>. | ||
|
@@ -38,6 +39,7 @@ public class JdbcToArrowConfigBuilder { | |
public JdbcToArrowConfigBuilder() { | ||
this.allocator = null; | ||
this.calendar = null; | ||
this.includeMetadata = false; | ||
} | ||
|
||
/** | ||
|
@@ -63,6 +65,32 @@ public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar) { | |
|
||
this.allocator = allocator; | ||
this.calendar = calendar; | ||
this.includeMetadata = false; | ||
} | ||
|
||
/** | ||
* Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the | ||
* allocator and calendar are required. A {@link NullPointerException} | ||
* will be thrown if either of those arguments is <code>null</code>. | ||
* <p> | ||
* The allocator is used to construct Arrow vectors from the JDBC ResultSet. | ||
* The calendar is used to determine the time zone of {@link java.sql.Timestamp} | ||
* fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and | ||
* {@link java.sql.Timestamp} fields to a single, common time zone when reading | ||
* from the result set. | ||
* </p> | ||
* <p> | ||
* The <code>includeMetadata</code> argument, if <code>true</code> will cause | ||
* various information about each database field to be added to the Vector | ||
* Schema's field metadata. | ||
* </p> | ||
* | ||
* @param allocator The Arrow Vector memory allocator. | ||
* @param calendar The calendar to use when constructing timestamp fields. | ||
*/ | ||
public JdbcToArrowConfigBuilder(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) { | ||
this(allocator, calendar); | ||
this.includeMetadata = includeMetadata; | ||
} | ||
|
||
/** | ||
|
@@ -90,6 +118,17 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { | |
return this; | ||
} | ||
|
||
/** | ||
* Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. | ||
* | ||
* @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. | ||
* @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. | ||
*/ | ||
public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { | ||
this.includeMetadata = includeMetadata; | ||
return this; | ||
} | ||
|
||
/** | ||
* This builds the {@link JdbcToArrowConfig} from the provided | ||
* {@link BaseAllocator} and {@link Calendar}. | ||
|
@@ -98,6 +137,6 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { | |
* @throws NullPointerException if either the allocator or calendar was not set. | ||
*/ | ||
public JdbcToArrowConfig build() { | ||
return new JdbcToArrowConfig(allocator, calendar); | ||
return new JdbcToArrowConfig(allocator, calendar, includeMetadata); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (minor nit - again ignore if you dont agree) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. Will change tonight. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hang on, I'm confused - There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I saw the ctors using the false flag in other places and was misled..this looks ok. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,11 @@ | |
import java.sql.Types; | ||
import java.util.ArrayList; | ||
import java.util.Calendar; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Locale; | ||
import java.util.Map; | ||
import java.util.TimeZone; | ||
|
||
import org.apache.arrow.memory.RootAllocator; | ||
import org.apache.arrow.vector.BaseFixedWidthVector; | ||
|
@@ -103,7 +107,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar | |
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); | ||
} | ||
|
||
/** | ||
* Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. | ||
*/ | ||
public static Calendar getUtcCalendar() { | ||
return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); | ||
} | ||
|
||
/** | ||
|
@@ -148,75 +159,93 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig | |
List<Field> fields = new ArrayList<>(); | ||
int columnCount = rsmd.getColumnCount(); | ||
for (int i = 1; i <= columnCount; i++) { | ||
String columnName = rsmd.getColumnName(i); | ||
final String columnName = rsmd.getColumnName(i); | ||
final FieldType fieldType; | ||
|
||
final Map<String, String> metadata; | ||
if (config.getIncludeMetadata()) { | ||
metadata = new HashMap<String, String>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can use the <> operator to avoid the mention of types in RHS again.. https://www.javaworld.com/article/2074080/core-java/core-java-jdk-7-the-diamond-operator.html There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point; will fix tonight. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); | ||
metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); | ||
metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); | ||
metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); | ||
|
||
} else { | ||
metadata = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (minor nit) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can do that, but then I can't make the metadata variable final. I've worked on teams in the past where the best practice was to mark things final as often as possible; is that the same with Arrow? |
||
} | ||
|
||
switch (rsmd.getColumnType(i)) { | ||
case Types.BOOLEAN: | ||
case Types.BIT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null)); | ||
fieldType = new FieldType(true, new ArrowType.Bool(), null, metadata); | ||
break; | ||
case Types.TINYINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(8, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(8, true), null, metadata); | ||
break; | ||
case Types.SMALLINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(16, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(16, true), null, metadata); | ||
break; | ||
case Types.INTEGER: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(32, true), null, metadata); | ||
break; | ||
case Types.BIGINT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Int(64, true)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Int(64, true), null, metadata); | ||
break; | ||
case Types.NUMERIC: | ||
case Types.DECIMAL: | ||
int precision = rsmd.getPrecision(i); | ||
int scale = rsmd.getScale(i); | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Decimal(precision, scale)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Decimal(precision, scale), null, metadata); | ||
break; | ||
case Types.REAL: | ||
case Types.FLOAT: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(SINGLE)), null)); | ||
fieldType = new FieldType(true, new ArrowType.FloatingPoint(SINGLE), null, metadata); | ||
break; | ||
case Types.DOUBLE: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(DOUBLE)), null)); | ||
fieldType = new FieldType(true, new ArrowType.FloatingPoint(DOUBLE), null, metadata); | ||
break; | ||
case Types.CHAR: | ||
case Types.NCHAR: | ||
case Types.VARCHAR: | ||
case Types.NVARCHAR: | ||
case Types.LONGVARCHAR: | ||
case Types.LONGNVARCHAR: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); | ||
case Types.CLOB: | ||
fieldType = new FieldType(true, new ArrowType.Utf8(), null, metadata); | ||
break; | ||
case Types.DATE: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Date(DateUnit.MILLISECOND)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Date(DateUnit.MILLISECOND), null, metadata); | ||
break; | ||
case Types.TIME: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Time(TimeUnit.MILLISECOND, 32)), null)); | ||
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata); | ||
break; | ||
case Types.TIMESTAMP: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, | ||
config.getCalendar().getTimeZone().getID())), null)); | ||
fieldType = | ||
new FieldType( | ||
true, | ||
new ArrowType.Timestamp(TimeUnit.MILLISECOND, config.getCalendar().getTimeZone().getID()), | ||
null, | ||
metadata); | ||
break; | ||
case Types.BINARY: | ||
case Types.VARBINARY: | ||
case Types.LONGVARBINARY: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); | ||
break; | ||
case Types.ARRAY: | ||
// TODO Need to handle this type | ||
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); | ||
break; | ||
case Types.CLOB: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null)); | ||
break; | ||
case Types.BLOB: | ||
fields.add(new Field(columnName, FieldType.nullable(new ArrowType.Binary()), null)); | ||
fieldType = new FieldType(true, new ArrowType.Binary(), null, metadata); | ||
break; | ||
|
||
case Types.ARRAY: | ||
// TODO Need to handle this type | ||
// fields.add(new Field("list", FieldType.nullable(new ArrowType.List()), null)); | ||
default: | ||
// no-op, shouldn't get here | ||
fieldType = null; | ||
break; | ||
} | ||
|
||
if (fieldType != null) { | ||
fields.add(new Field(columnName, fieldType, null)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the use of FieldType object instead of creating Field object in each case stmt. I am fine with this. |
||
} | ||
} | ||
|
||
return new Schema(fields, null); | ||
|
@@ -250,7 +279,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen | |
Preconditions.checkNotNull(root, "Vector Schema cannot be null"); | ||
Preconditions.checkNotNull(calendar, "Calendar object can't be null"); | ||
|
||
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); | ||
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar, false)); | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
minor nit (please ignore if you dont agree :))
should this be named "shouldIncludeMetadata"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point; will fix tonight.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.