Skip to content

Commit 78bd204

Browse files
authored
[thrift-remodel] Use thrift_enum macro for ConvertedType (#8680)
# Which issue does this PR close? - Part of #5853. # Rationale for this change While converting to the new Thrift model, the `ConvertedType` enum was done manually due to the `NONE` variant, which used the discriminant of `0`. This PR changes that to `-1` which allows the `thrift_enum` macro to be used instead. This improves code maintainability. # What changes are included in this PR? See above. # Are these changes tested? Covered by existing tests # Are there any user-facing changes? No, this only changes the discriminant value for a unit variant enum.
1 parent 99811f8 commit 78bd204

File tree

1 file changed

+77
-127
lines changed

1 file changed

+77
-127
lines changed

parquet/src/basic.rs

Lines changed: 77 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -61,156 +61,112 @@ enum Type {
6161

6262
// ----------------------------------------------------------------------
6363
// Mirrors thrift enum `ConvertedType`
64-
//
65-
// Cannot use macros because of added field `None`
6664

6765
// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should
68-
// look into removing it and using `Option<ConvertedType>` instead. Then all of this
69-
// handwritten code could go away.
70-
66+
// look into removing it and using `Option<ConvertedType>` instead.
67+
thrift_enum!(
7168
/// Common types (converted types) used by frameworks when using Parquet.
7269
///
7370
/// This helps map between types in those frameworks to the base types in Parquet.
7471
/// This is only metadata and not needed to read or write the data.
7572
///
7673
/// This struct was renamed from `LogicalType` in version 4.0.0.
7774
/// If targeting Parquet format 2.4.0 or above, please use [LogicalType] instead.
78-
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79-
#[allow(non_camel_case_types)]
80-
pub enum ConvertedType {
81-
/// No type conversion.
82-
NONE,
83-
/// A BYTE_ARRAY actually contains UTF8 encoded chars.
84-
UTF8,
85-
86-
/// A map is converted as an optional field containing a repeated key/value pair.
87-
MAP,
88-
89-
/// A key/value pair is converted into a group of two fields.
90-
MAP_KEY_VALUE,
91-
92-
/// A list is converted into an optional field containing a repeated field for its
93-
/// values.
94-
LIST,
95-
96-
/// An enum is converted into a binary field
97-
ENUM,
98-
99-
/// A decimal value.
100-
/// This may be used to annotate binary or fixed primitive types. The
101-
/// underlying byte array stores the unscaled value encoded as two's
102-
/// complement using big-endian byte order (the most significant byte is the
103-
/// zeroth element).
104-
///
105-
/// This must be accompanied by a (maximum) precision and a scale in the
106-
/// SchemaElement. The precision specifies the number of digits in the decimal
107-
/// and the scale stores the location of the decimal point. For example 1.23
108-
/// would have precision 3 (3 total digits) and scale 2 (the decimal point is
109-
/// 2 digits over).
110-
DECIMAL,
75+
enum ConvertedType {
76+
/// Not defined in the spec, used internally to indicate no type conversion
77+
NONE = -1;
11178

112-
/// A date stored as days since Unix epoch, encoded as the INT32 physical type.
113-
DATE,
79+
/// A BYTE_ARRAY actually contains UTF8 encoded chars.
80+
UTF8 = 0;
11481

115-
/// The total number of milliseconds since midnight. The value is stored as an INT32
116-
/// physical type.
117-
TIME_MILLIS,
82+
/// A map is converted as an optional field containing a repeated key/value pair.
83+
MAP = 1;
11884

119-
/// The total number of microseconds since midnight. The value is stored as an INT64
120-
/// physical type.
121-
TIME_MICROS,
85+
/// A key/value pair is converted into a group of two fields.
86+
MAP_KEY_VALUE = 2;
12287

123-
/// Date and time recorded as milliseconds since the Unix epoch.
124-
/// Recorded as a physical type of INT64.
125-
TIMESTAMP_MILLIS,
88+
/// A list is converted into an optional field containing a repeated field for its
89+
/// values.
90+
LIST = 3;
12691

127-
/// Date and time recorded as microseconds since the Unix epoch.
128-
/// The value is stored as an INT64 physical type.
129-
TIMESTAMP_MICROS,
92+
/// An enum is converted into a BYTE_ARRAY field
93+
ENUM = 4;
13094

131-
/// An unsigned 8 bit integer value stored as INT32 physical type.
132-
UINT_8,
95+
/// A decimal value.
96+
///
97+
/// This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive
98+
/// types. The underlying byte array stores the unscaled value encoded as two's
99+
/// complement using big-endian byte order (the most significant byte is the
100+
/// zeroth element). The value of the decimal is the value * 10^{-scale}.
101+
///
102+
/// This must be accompanied by a (maximum) precision and a scale in the
103+
/// SchemaElement. The precision specifies the number of digits in the decimal
104+
/// and the scale stores the location of the decimal point. For example 1.23
105+
/// would have precision 3 (3 total digits) and scale 2 (the decimal point is
106+
/// 2 digits over).
107+
DECIMAL = 5;
133108

134-
/// An unsigned 16 bit integer value stored as INT32 physical type.
135-
UINT_16,
109+
/// A date stored as days since Unix epoch, encoded as the INT32 physical type.
110+
DATE = 6;
136111

137-
/// An unsigned 32 bit integer value stored as INT32 physical type.
138-
UINT_32,
112+
/// The total number of milliseconds since midnight. The value is stored as an INT32
113+
/// physical type.
114+
TIME_MILLIS = 7;
139115

140-
/// An unsigned 64 bit integer value stored as INT64 physical type.
141-
UINT_64,
116+
/// The total number of microseconds since midnight. The value is stored as an INT64
117+
/// physical type.
118+
TIME_MICROS = 8;
142119

143-
/// A signed 8 bit integer value stored as INT32 physical type.
144-
INT_8,
120+
/// Date and time recorded as milliseconds since the Unix epoch.
121+
/// Recorded as a physical type of INT64.
122+
TIMESTAMP_MILLIS = 9;
145123

146-
/// A signed 16 bit integer value stored as INT32 physical type.
147-
INT_16,
124+
/// Date and time recorded as microseconds since the Unix epoch.
125+
/// The value is stored as an INT64 physical type.
126+
TIMESTAMP_MICROS = 10;
148127

149-
/// A signed 32 bit integer value stored as INT32 physical type.
150-
INT_32,
128+
/// An unsigned 8 bit integer value stored as INT32 physical type.
129+
UINT_8 = 11;
151130

152-
/// A signed 64 bit integer value stored as INT64 physical type.
153-
INT_64,
131+
/// An unsigned 16 bit integer value stored as INT32 physical type.
132+
UINT_16 = 12;
154133

155-
/// A JSON document embedded within a single UTF8 column.
156-
JSON,
134+
/// An unsigned 32 bit integer value stored as INT32 physical type.
135+
UINT_32 = 13;
157136

158-
/// A BSON document embedded within a single BINARY column.
159-
BSON,
137+
/// An unsigned 64 bit integer value stored as INT64 physical type.
138+
UINT_64 = 14;
160139

161-
/// An interval of time.
162-
///
163-
/// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
164-
/// This data is composed of three separate little endian unsigned integers.
165-
/// Each stores a component of a duration of time. The first integer identifies
166-
/// the number of months associated with the duration, the second identifies
167-
/// the number of days associated with the duration and the third identifies
168-
/// the number of milliseconds associated with the provided duration.
169-
/// This duration of time is independent of any particular timezone or date.
170-
INTERVAL,
171-
}
140+
/// A signed 8 bit integer value stored as INT32 physical type.
141+
INT_8 = 15;
172142

173-
impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType {
174-
fn read_thrift(prot: &mut R) -> Result<Self> {
175-
let val = prot.read_i32()?;
176-
Ok(match val {
177-
0 => Self::UTF8,
178-
1 => Self::MAP,
179-
2 => Self::MAP_KEY_VALUE,
180-
3 => Self::LIST,
181-
4 => Self::ENUM,
182-
5 => Self::DECIMAL,
183-
6 => Self::DATE,
184-
7 => Self::TIME_MILLIS,
185-
8 => Self::TIME_MICROS,
186-
9 => Self::TIMESTAMP_MILLIS,
187-
10 => Self::TIMESTAMP_MICROS,
188-
11 => Self::UINT_8,
189-
12 => Self::UINT_16,
190-
13 => Self::UINT_32,
191-
14 => Self::UINT_64,
192-
15 => Self::INT_8,
193-
16 => Self::INT_16,
194-
17 => Self::INT_32,
195-
18 => Self::INT_64,
196-
19 => Self::JSON,
197-
20 => Self::BSON,
198-
21 => Self::INTERVAL,
199-
_ => return Err(general_err!("Unexpected ConvertedType {}", val)),
200-
})
201-
}
202-
}
143+
/// A signed 16 bit integer value stored as INT32 physical type.
144+
INT_16 = 16;
203145

204-
impl WriteThrift for ConvertedType {
205-
const ELEMENT_TYPE: ElementType = ElementType::I32;
146+
/// A signed 32 bit integer value stored as INT32 physical type.
147+
INT_32 = 17;
206148

207-
fn write_thrift<W: Write>(&self, writer: &mut ThriftCompactOutputProtocol<W>) -> Result<()> {
208-
// because we've added NONE, the variant values are off by 1, so correct that here
209-
writer.write_i32(*self as i32 - 1)
210-
}
211-
}
149+
/// A signed 64 bit integer value stored as INT64 physical type.
150+
INT_64 = 18;
151+
152+
/// A JSON document embedded within a single UTF8 column.
153+
JSON = 19;
212154

213-
write_thrift_field!(ConvertedType, FieldType::I32);
155+
/// A BSON document embedded within a single BINARY column.
156+
BSON = 20;
157+
158+
/// An interval of time
159+
///
160+
/// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12.
161+
/// This data is composed of three separate little endian unsigned integers.
162+
/// Each stores a component of a duration of time. The first integer identifies
163+
/// the number of months associated with the duration, the second identifies
164+
/// the number of days associated with the duration and the third identifies
165+
/// the number of milliseconds associated with the provided duration.
166+
/// This duration of time is independent of any particular timezone or date.
167+
INTERVAL = 21;
168+
}
169+
);
214170

215171
// ----------------------------------------------------------------------
216172
// Mirrors thrift union `TimeUnit`
@@ -1327,12 +1283,6 @@ impl WriteThrift for ColumnOrder {
13271283
// ----------------------------------------------------------------------
13281284
// Display handlers
13291285

1330-
impl fmt::Display for ConvertedType {
1331-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1332-
write!(f, "{self:?}")
1333-
}
1334-
}
1335-
13361286
impl fmt::Display for Compression {
13371287
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
13381288
write!(f, "{self:?}")

0 commit comments

Comments
 (0)