Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize make_date (#9089) #9600

Merged
merged 3 commits into from
Mar 17, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 55 additions & 63 deletions datafusion/physical-expr/src/datetime_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,44 +303,10 @@ pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
ColumnarValue::Array(a) => Some(a.len()),
});

let is_scalar = len.is_none();
let array_size = if is_scalar { 1 } else { len.unwrap() };

let years = args[0].cast_to(&DataType::Int32, None)?;
let months = args[1].cast_to(&DataType::Int32, None)?;
let days = args[2].cast_to(&DataType::Int32, None)?;

// since the epoch for the date32 datatype is the unix epoch
// we need to subtract the unix epoch from the current date
// note this can result in a negative value
let unix_days_from_ce = NaiveDate::from_ymd_opt(1970, 1, 1)
.unwrap()
.num_days_from_ce();

let mut builder: PrimitiveBuilder<Date32Type> = PrimitiveArray::builder(array_size);

let construct_date_fn = |builder: &mut PrimitiveBuilder<Date32Type>,
year: i32,
month: i32,
day: i32,
unix_days_from_ce: i32|
-> Result<()> {
let Ok(m) = u32::try_from(month) else {
return exec_err!("Month value '{month:?}' is out of range");
};
let Ok(d) = u32::try_from(day) else {
return exec_err!("Day value '{day:?}' is out of range");
};

let date = NaiveDate::from_ymd_opt(year, m, d);

match date {
Some(d) => builder.append_value(d.num_days_from_ce() - unix_days_from_ce),
None => return exec_err!("Unable to parse date from {year}, {month}, {day}"),
};
Ok(())
};

let scalar_value_fn = |col: &ColumnarValue| -> Result<i32> {
let ColumnarValue::Scalar(s) = col else {
return exec_err!("Expected scalar value");
Expand All @@ -351,51 +317,77 @@ pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
Ok(*i)
};

// For scalar only columns the operation is faster without using the PrimitiveArray
if is_scalar {
construct_date_fn(
&mut builder,
scalar_value_fn(&years)?,
scalar_value_fn(&months)?,
scalar_value_fn(&days)?,
unix_days_from_ce,
)?;
} else {
let to_primitive_array = |col: &ColumnarValue,
scalar_count: usize|
-> Result<PrimitiveArray<Int32Type>> {
let value = if let Some(array_size) = len {
let to_primitive_array_fn = |col: &ColumnarValue| -> PrimitiveArray<Int32Type> {
match col {
ColumnarValue::Array(a) => Ok(a.as_primitive::<Int32Type>().to_owned()),
ColumnarValue::Array(a) => a.as_primitive::<Int32Type>().to_owned(),
_ => {
let v = scalar_value_fn(col).unwrap();
Ok(PrimitiveArray::<Int32Type>::from_value(v, scalar_count))
PrimitiveArray::<Int32Type>::from_value(v, array_size)
}
}
};

let years = to_primitive_array(&years, array_size).unwrap();
let months = to_primitive_array(&months, array_size).unwrap();
let days = to_primitive_array(&days, array_size).unwrap();
let years = to_primitive_array_fn(&years);
let months = to_primitive_array_fn(&months);
let days = to_primitive_array_fn(&days);

let mut builder: PrimitiveBuilder<Date32Type> =
PrimitiveArray::builder(array_size);
for i in 0..array_size {
construct_date_fn(
&mut builder,
process_date(
years.value(i),
months.value(i),
days.value(i),
unix_days_from_ce,
|days: i32| builder.append_value(days),
)?;
}
}

let arr = builder.finish();
let arr = builder.finish();

if is_scalar {
// If all inputs are scalar, keeps output as scalar
Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(
arr.value(0),
))))
ColumnarValue::Array(Arc::new(arr))
} else {
// For scalar only columns the operation is faster without using the PrimitiveArray.
// Also, keep the output as scalar since all inputs are scalar.
let mut value = 0;
process_date(
scalar_value_fn(&years)?,
scalar_value_fn(&months)?,
scalar_value_fn(&days)?,
|days: i32| value = days,
)?;

ColumnarValue::Scalar(ScalarValue::Date32(Some(value)))
};

Ok(value)
}

fn process_date(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please have more intuitive name for the function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps something like make_date_inner

Suggested change
fn process_date(
/// Converts the year/month/day fields to a `i32`
/// representing the days from the unix epoch
/// and invokes date_consumer_fn with the value
fn make_date_inner(

year: i32,
month: i32,
day: i32,
mut date_consumer_fn: impl FnMut(i32),
) -> Result<()> {
let Ok(m) = u32::try_from(month) else {
return exec_err!("Month value '{month:?}' is out of range");
};
let Ok(d) = u32::try_from(day) else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is u32 from i32, perhaps its better to align types?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not quite sure what you are suggesting here @comphead I think this code is moved from above

The fact that arrow uses i32 for the subfields rather than u32 is somewhat non ideal I agree, but this code I believe just follows the standard

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

u32:: because NaiveDate::from_ymd_opt(year, m, d) requires that for m & d.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks folks, my point was prob to have the method signature to receive u32 instead of i32, the method works with dates and u32 more natural for the date imho

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree u32 would be more natural. I think the i32 is used because that is what the underlying APIs require

return exec_err!("Day value '{day:?}' is out of range");
};

if let Some(date) = NaiveDate::from_ymd_opt(year, m, d) {
// The number of days until the start of the unix epoch in the proleptic Gregorian calendar
// (with January 1, Year 1 (CE) as day 1). See [Datelike::num_days_from_ce].
const UNIX_DAYS_FROM_CE: i32 = 719_163;

// since the epoch for the date32 datatype is the unix epoch
// we need to subtract the unix epoch from the current date
// note that this can result in a negative value
date_consumer_fn(date.num_days_from_ce() - UNIX_DAYS_FROM_CE);
Ok(())
} else {
Ok(ColumnarValue::Array(Arc::new(arr)))
exec_err!("Unable to parse date from {year}, {month}, {day}")
}
}

Expand Down
Loading