From b0e067c7b928e94bda6c8e93db0908b54c00fa4a Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Mon, 7 Jul 2014 18:51:37 -0700 Subject: [PATCH 1/2] PARQUET-12: Add format support for additional converted types. --- src/thrift/parquet.thrift | 87 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/src/thrift/parquet.thrift b/src/thrift/parquet.thrift index 551215327..99e3b69b0 100644 --- a/src/thrift/parquet.thrift +++ b/src/thrift/parquet.thrift @@ -76,6 +76,93 @@ enum ConvertedType { * 2 digits over). */ DECIMAL = 5; + + /** + * A Date + * + * Stored as days since Unix epoch, encoded as the INT32 physical type. + * + */ + DATE = 6; + + /** + * A time + * + * The total number of units of time since midnight. The value can either + * be recorded as the number of milliseconds since midnight using + * TIME_MILLIS and the INT32 physical type or the number of microseconds + * since midnight by using TIME_MICROS and the INT64 physical type. + * + */ + TIME_MILLIS = 7; + TIME_MICROS = 8; + + /** + * A date/time combination + * + * Date and time recorded as units since the Unix epoch. Always recorded as + * a physical type of INT64, the data can be stored as either milliseconds + * since epoch (TIMESTAMP_MILLIS) or microseconds since epoch (TIMESTAMP_MICROS). + */ + TIMESTAMP_MILLIS = 9; + TIMESTAMP_MICROS = 10; + + /** + * An unsigned integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + UINT_8 = 11; + UINT_16 = 12; + UINT_32 = 13; + UINT_64 = 14; + + /** + * A signed integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + INT_8 = 15; + INT_16 = 16; + INT_32 = 17; + INT_64 = 18; + + /** + * An embedded JSON document + * + * A JSON document embedded within a single UTF8 column. + */ + JSON = 19; + + /** + * An embedded BSON document + * + * A BSON document embedded within a single BINARY column. + */ + BSON = 20; + + /** + * An interval of time + * + * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 + * This data is composed of three separate little endian unsigned + * integers. Each stores a component of a duration of time. The first + * integer identifies the number of months associated with the duration, + * the second identifies the number of days associated with the duration + * and the third identifies the number of milliseconds associated with + * the provided duration. This duration of time is independent of any + * particular timezone or date. + */ + INTERVAL = 21; + } /** From 7001502877e0cfbf81d429656989057ccc5fafb2 Mon Sep 17 00:00:00 2001 From: Jacques Nadeau Date: Mon, 28 Jul 2014 13:36:01 -0700 Subject: [PATCH 2/2] Remove micros implementations until everyone is agreed on micros versus nanos. --- src/thrift/parquet.thrift | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/thrift/parquet.thrift b/src/thrift/parquet.thrift index 99e3b69b0..9d875b00e 100644 --- a/src/thrift/parquet.thrift +++ b/src/thrift/parquet.thrift @@ -88,24 +88,21 @@ enum ConvertedType { /** * A time * - * The total number of units of time since midnight. The value can either - * be recorded as the number of milliseconds since midnight using - * TIME_MILLIS and the INT32 physical type or the number of microseconds - * since midnight by using TIME_MICROS and the INT64 physical type. - * + * The total number of milliseconds since midnight. The value is stored + * as an INT32 physical type. */ TIME_MILLIS = 7; - TIME_MICROS = 8; + // RESERVED = 8; /** * A date/time combination * - * Date and time recorded as units since the Unix epoch. Always recorded as - * a physical type of INT64, the data can be stored as either milliseconds - * since epoch (TIMESTAMP_MILLIS) or microseconds since epoch (TIMESTAMP_MICROS). + * Date and time recorded as milliseconds since the Unix epoch. Recorded as + * a physical type of INT64. */ TIMESTAMP_MILLIS = 9; - TIMESTAMP_MICROS = 10; + // RESERVED = 10; + /** * An unsigned integer value.