Skip to content

Commit 274b222

Browse files
authored
Deprecate invoke and invoke_no_args in favor of invoke_batch (#13174)
* Deprecate invoke and invoke_no_args in favor of invoke_batch `invoke_batch` covers all needs, so let's deprecate and eventually remove the redundant variants. * Migrate test_function to invoke_batch * Migrate regexpcount tests to invoke_batch * Migrate log tests to invoke_batch * Migrate tests to use invoke_batch * Migrate ToUnixtimeFunc to implement invoke_batch * Suppress deprecation warnings in tests To be followed-up on. * Migrate random benchmark to invoke_batch * fixup! Suppress deprecation warnings in tests * Fix docstring
1 parent 2482ff4 commit 274b222

File tree

21 files changed

+199
-102
lines changed

21 files changed

+199
-102
lines changed

datafusion/expr/src/udf.rs

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ impl ScalarUDF {
195195
/// See [`ScalarUDFImpl::invoke`] for more details.
196196
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
197197
pub fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
198+
#[allow(deprecated)]
198199
self.inner.invoke(args)
199200
}
200201

@@ -218,6 +219,7 @@ impl ScalarUDF {
218219
/// See [`ScalarUDFImpl::invoke_no_args`] for more details.
219220
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
220221
pub fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
222+
#[allow(deprecated)]
221223
self.inner.invoke_no_args(number_rows)
222224
}
223225

@@ -226,6 +228,7 @@ impl ScalarUDF {
226228
#[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")]
227229
pub fn fun(&self) -> ScalarFunctionImplementation {
228230
let captured = Arc::clone(&self.inner);
231+
#[allow(deprecated)]
229232
Arc::new(move |args| captured.invoke(args))
230233
}
231234

@@ -480,6 +483,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
480483
/// to arrays, which will likely be simpler code, but be slower.
481484
///
482485
/// [invoke_no_args]: ScalarUDFImpl::invoke_no_args
486+
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
483487
fn invoke(&self, _args: &[ColumnarValue]) -> Result<ColumnarValue> {
484488
not_impl_err!(
485489
"Function {} does not implement invoke but called",
@@ -489,19 +493,40 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
489493

490494
/// Invoke the function with `args` and the number of rows,
491495
/// returning the appropriate result.
496+
///
497+
/// The function will be invoked with the slice of [`ColumnarValue`]
498+
/// (either scalar or array).
499+
///
500+
/// # Performance
501+
///
502+
/// For the best performance, the implementations should handle the common case
503+
/// when one or more of their arguments are constant values (aka
504+
/// [`ColumnarValue::Scalar`]).
505+
///
506+
/// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
507+
/// to arrays, which will likely be simpler code, but be slower.
492508
fn invoke_batch(
493509
&self,
494510
args: &[ColumnarValue],
495511
number_rows: usize,
496512
) -> Result<ColumnarValue> {
497513
match args.is_empty() {
498-
true => self.invoke_no_args(number_rows),
499-
false => self.invoke(args),
514+
true =>
515+
{
516+
#[allow(deprecated)]
517+
self.invoke_no_args(number_rows)
518+
}
519+
false =>
520+
{
521+
#[allow(deprecated)]
522+
self.invoke(args)
523+
}
500524
}
501525
}
502526

503527
/// Invoke the function without `args`, instead the number of rows are provided,
504528
/// returning the appropriate result.
529+
#[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")]
505530
fn invoke_no_args(&self, _number_rows: usize) -> Result<ColumnarValue> {
506531
not_impl_err!(
507532
"Function {} does not implement invoke_no_args but called",
@@ -725,10 +750,12 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
725750
}
726751

727752
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
753+
#[allow(deprecated)]
728754
self.inner.invoke(args)
729755
}
730756

731757
fn invoke_no_args(&self, number_rows: usize) -> Result<ColumnarValue> {
758+
#[allow(deprecated)]
732759
self.inner.invoke_no_args(number_rows)
733760
}
734761

datafusion/functions/benches/random.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ fn criterion_benchmark(c: &mut Criterion) {
2929
c.bench_function("random_1M_rows_batch_8192", |b| {
3030
b.iter(|| {
3131
for _ in 0..iterations {
32-
black_box(random_func.invoke_no_args(8192).unwrap());
32+
black_box(random_func.invoke_batch(&[], 8192).unwrap());
3333
}
3434
})
3535
});
@@ -39,7 +39,7 @@ fn criterion_benchmark(c: &mut Criterion) {
3939
c.bench_function("random_1M_rows_batch_128", |b| {
4040
b.iter(|| {
4141
for _ in 0..iterations_128 {
42-
black_box(random_func.invoke_no_args(128).unwrap());
42+
black_box(random_func.invoke_batch(&[], 128).unwrap());
4343
}
4444
})
4545
});

datafusion/functions/src/datetime/date_bin.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ mod tests {
491491
use chrono::TimeDelta;
492492

493493
#[test]
494+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
494495
fn test_date_bin() {
495496
let res = DateBinFunc::new().invoke(&[
496497
ColumnarValue::Scalar(ScalarValue::IntervalDayTime(Some(IntervalDayTime {
@@ -781,6 +782,7 @@ mod tests {
781782
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
782783
.collect::<TimestampNanosecondArray>()
783784
.with_timezone_opt(tz_opt.clone());
785+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
784786
let result = DateBinFunc::new()
785787
.invoke(&[
786788
ColumnarValue::Scalar(ScalarValue::new_interval_dt(1, 0)),

datafusion/functions/src/datetime/date_trunc.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ mod tests {
724724
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
725725
.collect::<TimestampNanosecondArray>()
726726
.with_timezone_opt(tz_opt.clone());
727+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
727728
let result = DateTruncFunc::new()
728729
.invoke(&[
729730
ColumnarValue::Scalar(ScalarValue::from("day")),
@@ -882,6 +883,7 @@ mod tests {
882883
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
883884
.collect::<TimestampNanosecondArray>()
884885
.with_timezone_opt(tz_opt.clone());
886+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
885887
let result = DateTruncFunc::new()
886888
.invoke(&[
887889
ColumnarValue::Scalar(ScalarValue::from("hour")),

datafusion/functions/src/datetime/from_unixtime.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ mod test {
162162
fn test_without_timezone() {
163163
let args = [ColumnarValue::Scalar(Int64(Some(1729900800)))];
164164

165+
#[allow(deprecated)] // TODO use invoke_batch
165166
let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
166167

167168
match result {
@@ -181,6 +182,7 @@ mod test {
181182
))),
182183
];
183184

185+
#[allow(deprecated)] // TODO use invoke_batch
184186
let result = FromUnixtimeFunc::new().invoke(&args).unwrap();
185187

186188
match result {

datafusion/functions/src/datetime/make_date.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ mod tests {
234234

235235
#[test]
236236
fn test_make_date() {
237+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
237238
let res = MakeDateFunc::new()
238239
.invoke(&[
239240
ColumnarValue::Scalar(ScalarValue::Int32(Some(2024))),
@@ -248,6 +249,7 @@ mod tests {
248249
panic!("Expected a scalar value")
249250
}
250251

252+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
251253
let res = MakeDateFunc::new()
252254
.invoke(&[
253255
ColumnarValue::Scalar(ScalarValue::Int64(Some(2024))),
@@ -262,6 +264,7 @@ mod tests {
262264
panic!("Expected a scalar value")
263265
}
264266

267+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
265268
let res = MakeDateFunc::new()
266269
.invoke(&[
267270
ColumnarValue::Scalar(ScalarValue::Utf8(Some("2024".to_string()))),
@@ -279,6 +282,7 @@ mod tests {
279282
let years = Arc::new((2021..2025).map(Some).collect::<Int64Array>());
280283
let months = Arc::new((1..5).map(Some).collect::<Int32Array>());
281284
let days = Arc::new((11..15).map(Some).collect::<UInt32Array>());
285+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
282286
let res = MakeDateFunc::new()
283287
.invoke(&[
284288
ColumnarValue::Array(years),
@@ -304,6 +308,7 @@ mod tests {
304308
//
305309

306310
// invalid number of arguments
311+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
307312
let res = MakeDateFunc::new()
308313
.invoke(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))]);
309314
assert_eq!(
@@ -312,6 +317,7 @@ mod tests {
312317
);
313318

314319
// invalid type
320+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
315321
let res = MakeDateFunc::new().invoke(&[
316322
ColumnarValue::Scalar(ScalarValue::IntervalYearMonth(Some(1))),
317323
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),
@@ -323,6 +329,7 @@ mod tests {
323329
);
324330

325331
// overflow of month
332+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
326333
let res = MakeDateFunc::new().invoke(&[
327334
ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
328335
ColumnarValue::Scalar(ScalarValue::UInt64(Some(u64::MAX))),
@@ -334,6 +341,7 @@ mod tests {
334341
);
335342

336343
// overflow of day
344+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
337345
let res = MakeDateFunc::new().invoke(&[
338346
ColumnarValue::Scalar(ScalarValue::Int32(Some(2023))),
339347
ColumnarValue::Scalar(ScalarValue::Int32(Some(22))),

datafusion/functions/src/datetime/to_char.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ mod tests {
384384
];
385385

386386
for (value, format, expected) in scalar_data {
387+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
387388
let result = ToCharFunc::new()
388389
.invoke(&[ColumnarValue::Scalar(value), ColumnarValue::Scalar(format)])
389390
.expect("that to_char parsed values without error");
@@ -458,6 +459,7 @@ mod tests {
458459
];
459460

460461
for (value, format, expected) in scalar_array_data {
462+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
461463
let result = ToCharFunc::new()
462464
.invoke(&[
463465
ColumnarValue::Scalar(value),
@@ -583,6 +585,7 @@ mod tests {
583585
];
584586

585587
for (value, format, expected) in array_scalar_data {
588+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
586589
let result = ToCharFunc::new()
587590
.invoke(&[
588591
ColumnarValue::Array(value as ArrayRef),
@@ -599,6 +602,7 @@ mod tests {
599602
}
600603

601604
for (value, format, expected) in array_array_data {
605+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
602606
let result = ToCharFunc::new()
603607
.invoke(&[
604608
ColumnarValue::Array(value),
@@ -619,6 +623,7 @@ mod tests {
619623
//
620624

621625
// invalid number of arguments
626+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
622627
let result = ToCharFunc::new()
623628
.invoke(&[ColumnarValue::Scalar(ScalarValue::Int32(Some(1)))]);
624629
assert_eq!(
@@ -627,6 +632,7 @@ mod tests {
627632
);
628633

629634
// invalid type
635+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
630636
let result = ToCharFunc::new().invoke(&[
631637
ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
632638
ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(Some(1), None)),

datafusion/functions/src/datetime/to_date.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ mod tests {
213213
}
214214

215215
fn test_scalar(sv: ScalarValue, tc: &TestCase) {
216+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
216217
let to_date_result = ToDateFunc::new().invoke(&[ColumnarValue::Scalar(sv)]);
217218

218219
match to_date_result {
@@ -233,6 +234,7 @@ mod tests {
233234
A: From<Vec<&'static str>> + Array + 'static,
234235
{
235236
let date_array = A::from(vec![tc.date_str]);
237+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
236238
let to_date_result =
237239
ToDateFunc::new().invoke(&[ColumnarValue::Array(Arc::new(date_array))]);
238240

@@ -323,6 +325,7 @@ mod tests {
323325
fn test_scalar(sv: ScalarValue, tc: &TestCase) {
324326
let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
325327

328+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
326329
let to_date_result = ToDateFunc::new().invoke(&[
327330
ColumnarValue::Scalar(sv),
328331
ColumnarValue::Scalar(format_scalar),
@@ -347,6 +350,7 @@ mod tests {
347350
let date_array = A::from(vec![tc.formatted_date]);
348351
let format_array = A::from(vec![tc.format_str]);
349352

353+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
350354
let to_date_result = ToDateFunc::new().invoke(&[
351355
ColumnarValue::Array(Arc::new(date_array)),
352356
ColumnarValue::Array(Arc::new(format_array)),
@@ -382,6 +386,7 @@ mod tests {
382386
let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
383387
let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
384388

389+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
385390
let to_date_result = ToDateFunc::new().invoke(&[
386391
ColumnarValue::Scalar(formatted_date_scalar),
387392
ColumnarValue::Scalar(format1_scalar),
@@ -410,6 +415,7 @@ mod tests {
410415
for date_str in test_cases {
411416
let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
412417

418+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
413419
let to_date_result =
414420
ToDateFunc::new().invoke(&[ColumnarValue::Scalar(formatted_date_scalar)]);
415421

@@ -428,6 +434,7 @@ mod tests {
428434
let date_str = "20241231";
429435
let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
430436

437+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
431438
let to_date_result =
432439
ToDateFunc::new().invoke(&[ColumnarValue::Scalar(date_scalar)]);
433440

@@ -449,6 +456,7 @@ mod tests {
449456
let date_str = "202412311";
450457
let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
451458

459+
#[allow(deprecated)] // TODO migrate UDF invoke to invoke_batch
452460
let to_date_result =
453461
ToDateFunc::new().invoke(&[ColumnarValue::Scalar(date_scalar)]);
454462

datafusion/functions/src/datetime/to_local_time.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ mod tests {
558558

559559
fn test_to_local_time_helper(input: ScalarValue, expected: ScalarValue) {
560560
let res = ToLocalTimeFunc::new()
561-
.invoke(&[ColumnarValue::Scalar(input)])
561+
.invoke_batch(&[ColumnarValue::Scalar(input)], 1)
562562
.unwrap();
563563
match res {
564564
ColumnarValue::Scalar(res) => {
@@ -616,8 +616,9 @@ mod tests {
616616
.iter()
617617
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
618618
.collect::<TimestampNanosecondArray>();
619+
let batch_size = input.len();
619620
let result = ToLocalTimeFunc::new()
620-
.invoke(&[ColumnarValue::Array(Arc::new(input))])
621+
.invoke_batch(&[ColumnarValue::Array(Arc::new(input))], batch_size)
621622
.unwrap();
622623
if let ColumnarValue::Array(result) = result {
623624
assert_eq!(

datafusion/functions/src/datetime/to_timestamp.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,6 @@ mod tests {
636636
use arrow::array::{ArrayRef, Int64Array, StringBuilder};
637637
use arrow::datatypes::TimeUnit;
638638
use chrono::Utc;
639-
640639
use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
641640
use datafusion_expr::ScalarFunctionImplementation;
642641

@@ -1011,7 +1010,7 @@ mod tests {
10111010
assert!(matches!(rt, Timestamp(_, Some(_))));
10121011

10131012
let res = udf
1014-
.invoke(&[array.clone()])
1013+
.invoke_batch(&[array.clone()], 1)
10151014
.expect("that to_timestamp parsed values without error");
10161015
let array = match res {
10171016
ColumnarValue::Array(res) => res,
@@ -1054,7 +1053,7 @@ mod tests {
10541053
assert!(matches!(rt, Timestamp(_, None)));
10551054

10561055
let res = udf
1057-
.invoke(&[array.clone()])
1056+
.invoke_batch(&[array.clone()], 1)
10581057
.expect("that to_timestamp parsed values without error");
10591058
let array = match res {
10601059
ColumnarValue::Array(res) => res,

0 commit comments

Comments
 (0)