Skip to content

Commit d359d64

Browse files
authored
Add support for Arrow Dictionary type in Substrait (apache#16608)
* Add support for Arrow Dictionary type in Substrait This commit adds support for the Arrow Dictionary type in Substrait plans. Resolves apache#16273 * Add more specific type variation consts
1 parent 698155a commit d359d64

File tree

3 files changed

+48
-20
lines changed

3 files changed

+48
-20
lines changed

datafusion/substrait/src/logical_plan/consumer/types.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ use crate::variation_const::{
2222
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2323
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
2424
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
25-
DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
25+
DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
26+
DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
2627
INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME,
2728
INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF,
2829
LARGE_CONTAINER_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF,
@@ -177,24 +178,32 @@ pub fn from_substrait_type(
177178
let value_type = map.value.as_ref().ok_or_else(|| {
178179
substrait_datafusion_err!("Map type must have value type")
179180
})?;
180-
let key_field = Arc::new(Field::new(
181-
"key",
182-
from_substrait_type(consumer, key_type, dfs_names, name_idx)?,
183-
false,
184-
));
185-
let value_field = Arc::new(Field::new(
186-
"value",
187-
from_substrait_type(consumer, value_type, dfs_names, name_idx)?,
188-
true,
189-
));
190-
Ok(DataType::Map(
191-
Arc::new(Field::new_struct(
192-
"entries",
193-
[key_field, value_field],
194-
false, // The inner map field is always non-nullable (Arrow #1697),
181+
let key_type =
182+
from_substrait_type(consumer, key_type, dfs_names, name_idx)?;
183+
let value_type =
184+
from_substrait_type(consumer, value_type, dfs_names, name_idx)?;
185+
186+
match map.type_variation_reference {
187+
DEFAULT_MAP_TYPE_VARIATION_REF => {
188+
let key_field = Arc::new(Field::new("key", key_type, false));
189+
let value_field = Arc::new(Field::new("value", value_type, true));
190+
Ok(DataType::Map(
191+
Arc::new(Field::new_struct(
192+
"entries",
193+
[key_field, value_field],
194+
false, // The inner map field is always non-nullable (Arrow #1697),
195+
)),
196+
false, // whether keys are sorted
197+
))
198+
}
199+
DICTIONARY_MAP_TYPE_VARIATION_REF => Ok(DataType::Dictionary(
200+
Box::new(key_type),
201+
Box::new(value_type),
195202
)),
196-
false, // whether keys are sorted
197-
))
203+
v => not_impl_err!(
204+
"Unsupported Substrait type variation {v} of type {s_kind:?}"
205+
),
206+
}
198207
}
199208
r#type::Kind::Decimal(d) => match d.type_variation_reference {
200209
DECIMAL_128_TYPE_VARIATION_REF => {

datafusion/substrait/src/logical_plan/producer/types.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ use crate::variation_const::{
2121
DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
2222
DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
2323
DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
24-
DEFAULT_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
24+
DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
25+
DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
2526
LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
2627
TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
2728
VIEW_CONTAINER_TYPE_VARIATION_REF,
@@ -276,13 +277,25 @@ pub(crate) fn to_substrait_type(
276277
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
277278
key: Some(Box::new(key_type)),
278279
value: Some(Box::new(value_type)),
279-
type_variation_reference: DEFAULT_CONTAINER_TYPE_VARIATION_REF,
280+
type_variation_reference: DEFAULT_MAP_TYPE_VARIATION_REF,
280281
nullability,
281282
}))),
282283
})
283284
}
284285
_ => plan_err!("Map fields must contain a Struct with exactly 2 fields"),
285286
},
287+
DataType::Dictionary(key_type, value_type) => {
288+
let key_type = to_substrait_type(key_type, nullable)?;
289+
let value_type = to_substrait_type(value_type, nullable)?;
290+
Ok(substrait::proto::Type {
291+
kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
292+
key: Some(Box::new(key_type)),
293+
value: Some(Box::new(value_type)),
294+
type_variation_reference: DICTIONARY_MAP_TYPE_VARIATION_REF,
295+
nullability,
296+
}))),
297+
})
298+
}
286299
DataType::Struct(fields) => {
287300
let field_types = fields
288301
.iter()
@@ -407,6 +420,10 @@ mod tests {
407420
.into(),
408421
false,
409422
))?;
423+
round_trip_type(DataType::Dictionary(
424+
Box::new(DataType::Utf8),
425+
Box::new(DataType::Int32),
426+
))?;
410427

411428
round_trip_type(DataType::Struct(
412429
vec![

datafusion/substrait/src/variation_const.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ pub const TIME_64_TYPE_VARIATION_REF: u32 = 1;
5555
pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0;
5656
pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1;
5757
pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2;
58+
pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0;
59+
pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1;
5860
pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0;
5961
pub const DECIMAL_256_TYPE_VARIATION_REF: u32 = 1;
6062
/// Used for the arrow type [`DataType::Interval`] with [`IntervalUnit::DayTime`].

0 commit comments

Comments
 (0)