Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename Glue data type #47

Merged
merged 2 commits into from
Jun 24, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ repositories {
}

group = "pro.civitaspo"
version = "0.5.0"
version = "0.5.1"
civitaspo marked this conversation as resolved.
Show resolved Hide resolved
description = "Dumps records to S3 Parquet."

sourceCompatibility = 1.8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,50 +8,50 @@ object GlueDataType {
extends GlueDataType(name)

// BOOLEAN – Values are true and false.
case object BOOLEAN extends GlueDataType("BOOLEAN")
case object BOOLEAN extends GlueDataType("boolean")
// TINYINT – A 8-bit signed INTEGER in two’s complement format, with a minimum value of -27 and a maximum value of 27-1.
case object TINYINT extends AbstractIntGlueDataType("TINYINT", bitWidth = 8)
case object TINYINT extends AbstractIntGlueDataType("tinyint", bitWidth = 8)
// SMALLINT – A 16-bit signed INTEGER in two’s complement format, with a minimum value of -215 and a maximum value of 215-1.
case object SMALLINT
extends AbstractIntGlueDataType("SMALLINT", bitWidth = 16)
extends AbstractIntGlueDataType("smallint", bitWidth = 16)
// INT and INTEGER – Athena combines two different implementations of the integer data type, as follows:
// * INT – In Data Definition Language (DDL) queries, Athena uses the INT data type.
// * INTEGER – In DML queries, Athena uses the INTEGER data type. INTEGER is represented as a 32-bit signed value in two's complement format, with a minimum value of -231 and a maximum value of 231-1.
case object INT extends AbstractIntGlueDataType("INT", bitWidth = 32)
case object INT extends AbstractIntGlueDataType("int", bitWidth = 32)
// BIGINT – A 64-bit signed INTEGER in two’s complement format, with a minimum value of -263 and a maximum value of 263-1.
case object BIGINT extends AbstractIntGlueDataType("BIGINT", bitWidth = 64)
case object BIGINT extends AbstractIntGlueDataType("bigint", bitWidth = 64)
// DOUBLE – A 64-bit double-precision floating point number.
case object DOUBLE extends GlueDataType("DOUBLE")
case object DOUBLE extends GlueDataType("double")
// FLOAT – A 32-bit single-precision floating point number. Equivalent to the REAL in Presto.
case object FLOAT extends GlueDataType("FLOAT")
case object FLOAT extends GlueDataType("float")
// DECIMAL(precision, scale) – precision is the total number of digits. scale (optional) is the number of digits in fractional part with a default of 0. For example, use these type definitions: DECIMAL(11,5), DECIMAL(15).
case class DECIMAL(precision: Int, scale: Int)
extends GlueDataType(s"DECIMAL($precision, $scale)")
extends GlueDataType(s"decimal($precision,$scale)")
// STRING – A string literal enclosed in single or double quotes. For more information, see STRING Hive Data Type.
case object STRING extends GlueDataType("STRING")
case object STRING extends GlueDataType("string")
// CHAR – Fixed length character data, with a specified length between 1 and 255, such as char(10). For more information, see CHAR Hive Data Type.
case class CHAR(length: Int) extends GlueDataType(s"CHAR($length)")
case class CHAR(length: Int) extends GlueDataType(s"char($length)")
// VARCHAR – Variable length character data, with a specified length between 1 and 65535, such as varchar(10). For more information, see VARCHAR Hive Data Type.
case class VARCHAR(length: Int) extends GlueDataType(s"VARCHAR($length)")
case class VARCHAR(length: Int) extends GlueDataType(s"varchar($length)")
// BINARY – Used for data in Parquet.
case object BINARY extends GlueDataType("BINARY")
case object BINARY extends GlueDataType("binary")
// DATE – A date in UNIX format, such as YYYY-MM-DD.
case object DATE extends GlueDataType("DATE")
case object DATE extends GlueDataType("date")
// TIMESTAMP – Date and time instant in the UNiX format, such as yyyy-mm-dd hh:mm:ss[.f...]. For example, TIMESTAMP '2008-09-15 03:04:05.324'. This format uses the session time zone.
case object TIMESTAMP extends GlueDataType("TIMESTAMP")
case object TIMESTAMP extends GlueDataType("timestamp")
// ARRAY<data_type>
case class ARRAY(dataType: GlueDataType)
extends GlueDataType(s"ARRAY<${dataType.name}>")
extends GlueDataType(s"array<${dataType.name}>")
// MAP<primitive_type, data_type>
case class MAP(keyDataType: GlueDataType, valueDataType: GlueDataType)
extends GlueDataType(s"MAP<${keyDataType.name}, ${valueDataType.name}>")
extends GlueDataType(s"map<${keyDataType.name},${valueDataType.name}>")
// STRUCT<col_name : data_type [COMMENT col_comment] , ...>
case class STRUCT(struct: Map[String, GlueDataType])
extends GlueDataType({
val columns = struct
.map {
case (columnName, glueType) => s"$columnName : ${glueType.name}"
}
s"STRUCT<${columns.mkString(", ")}>"
s"struct<${columns.mkString(",")}>"
})
}