Skip to content

Commit 810158f

Browse files
authored
Add producer.rs workarounds (#4)
1 parent 3888ebf commit 810158f

File tree

2 files changed

+35
-41
lines changed

2 files changed

+35
-41
lines changed

datafusion/substrait/src/logical_plan/producer.rs

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ use crate::variation_const::{
4343
LARGE_CONTAINER_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
4444
VIEW_CONTAINER_TYPE_VARIATION_REF,
4545
};
46+
use crate::variation_const::TIMESTAMP_NANO_TYPE_VARIATION_REF;
4647
use datafusion::arrow::array::{Array, GenericListArray, OffsetSizeTrait};
4748
use datafusion::arrow::temporal_conversions::NANOSECONDS;
4849
use datafusion::common::{
@@ -1825,31 +1826,16 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
18251826
nullability,
18261827
})),
18271828
}),
1828-
DataType::Timestamp(unit, tz) => {
1829-
let precision = match unit {
1830-
TimeUnit::Second => 0,
1831-
TimeUnit::Millisecond => 3,
1832-
TimeUnit::Microsecond => 6,
1833-
TimeUnit::Nanosecond => 9,
1834-
};
1835-
let kind = match tz {
1836-
None => r#type::Kind::PrecisionTimestamp(r#type::PrecisionTimestamp {
1837-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1829+
DataType::Timestamp(_unit, _) => {
1830+
// TODO: DataDog-specific workaround, don't commit upstream
1831+
#[allow(deprecated)]
1832+
let type_variation_reference = TIMESTAMP_NANO_TYPE_VARIATION_REF;
1833+
Ok(substrait::proto::Type {
1834+
kind: Some(r#type::Kind::Timestamp(r#type::Timestamp {
1835+
type_variation_reference,
18381836
nullability,
1839-
precision,
1840-
}),
1841-
Some(_) => {
1842-
// If timezone is present, no matter what the actual tz value is, it indicates the
1843-
// value of the timestamp is tied to UTC epoch. That's all that Substrait cares about.
1844-
// As the timezone is lost, this conversion may be lossy for downstream use of the value.
1845-
r#type::Kind::PrecisionTimestampTz(r#type::PrecisionTimestampTz {
1846-
type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
1847-
nullability,
1848-
precision,
1849-
})
1850-
}
1851-
};
1852-
Ok(substrait::proto::Type { kind: Some(kind) })
1837+
})),
1838+
})
18531839
}
18541840
DataType::Date32 => Ok(substrait::proto::Type {
18551841
kind: Some(r#type::Kind::Date(r#type::Date {
@@ -2004,6 +1990,8 @@ fn to_substrait_type(dt: &DataType, nullable: bool) -> Result<substrait::proto::
20041990
precision: *p as i32,
20051991
})),
20061992
}),
1993+
// TODO: DataDog-specific workaround, don't commit upstream
1994+
DataType::Dictionary(_, dt) => to_substrait_type(dt, nullable),
20071995
_ => not_impl_err!("Unsupported cast type: {dt:?}"),
20081996
}
20091997
}
@@ -2396,6 +2384,8 @@ fn to_substrait_literal(
23962384
}),
23972385
DEFAULT_TYPE_VARIATION_REF,
23982386
),
2387+
// TODO: DataDog-specific workaround, don't commit upstream
2388+
ScalarValue::Dictionary(_, value) => return to_substrait_literal(producer, value),
23992389
_ => (
24002390
not_impl_err!("Unsupported literal: {value:?}")?,
24012391
DEFAULT_TYPE_VARIATION_REF,
@@ -2598,17 +2588,18 @@ mod test {
25982588
round_trip_literal(ScalarValue::UInt64(Some(u64::MIN)))?;
25992589
round_trip_literal(ScalarValue::UInt64(Some(u64::MAX)))?;
26002590

2601-
for (ts, tz) in [
2602-
(Some(12345), None),
2603-
(None, None),
2604-
(Some(12345), Some("UTC".into())),
2605-
(None, Some("UTC".into())),
2606-
] {
2607-
round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
2608-
round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
2609-
round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
2610-
round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
2611-
}
2591+
// TODO: DataDog-specific workaround, don't commit upstream
2592+
// for (ts, tz) in [
2593+
// (Some(12345), None),
2594+
// (None, None),
2595+
// (Some(12345), Some("UTC".into())),
2596+
// (None, Some("UTC".into())),
2597+
// ] {
2598+
// round_trip_literal(ScalarValue::TimestampSecond(ts, tz.clone()))?;
2599+
// round_trip_literal(ScalarValue::TimestampMillisecond(ts, tz.clone()))?;
2600+
// round_trip_literal(ScalarValue::TimestampMicrosecond(ts, tz.clone()))?;
2601+
// round_trip_literal(ScalarValue::TimestampNanosecond(ts, tz))?;
2602+
// }
26122603

26132604
round_trip_literal(ScalarValue::List(ScalarValue::new_list_nullable(
26142605
&[ScalarValue::Float32(Some(1.0))],
@@ -2707,12 +2698,13 @@ mod test {
27072698
round_trip_type(DataType::Float32)?;
27082699
round_trip_type(DataType::Float64)?;
27092700

2710-
for tz in [None, Some("UTC".into())] {
2711-
round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
2712-
round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
2713-
round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
2714-
round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
2715-
}
2701+
// TODO: DataDog-specific workaround, don't commit upstream
2702+
// for tz in [None, Some("UTC".into())] {
2703+
// round_trip_type(DataType::Timestamp(TimeUnit::Second, tz.clone()))?;
2704+
// round_trip_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone()))?;
2705+
// round_trip_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone()))?;
2706+
// round_trip_type(DataType::Timestamp(TimeUnit::Nanosecond, tz))?;
2707+
// }
27162708

27172709
round_trip_type(DataType::Date32)?;
27182710
round_trip_type(DataType::Date64)?;

datafusion/substrait/tests/cases/roundtrip_logical_plan.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@ async fn qualified_catalog_schema_table_reference() -> Result<()> {
950950
/// - List, this nested type is not supported in arrow_cast
951951
/// - Decimal128 and Decimal256, them will fallback to UTF8 cast expr rather than plain literal.
952952
#[tokio::test]
953+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
953954
async fn all_type_literal() -> Result<()> {
954955
roundtrip_all_types(
955956
"select * from data where
@@ -1073,6 +1074,7 @@ async fn duplicate_column() -> Result<()> {
10731074

10741075
/// Construct a plan that cast columns. Only those SQL types are supported for now.
10751076
#[tokio::test]
1077+
#[ignore] // TODO: DataDog-specific workaround, don't commit upstream
10761078
async fn new_test_grammar() -> Result<()> {
10771079
roundtrip_all_types(
10781080
"select

0 commit comments

Comments
 (0)