From 48415da57c8555c9a1a039278f8d61f6f8bded2d Mon Sep 17 00:00:00 2001 From: Billy Lanchantin Date: Tue, 29 Aug 2023 14:03:00 -0400 Subject: [PATCH] Additional temporal arithmetic (#696) --- lib/explorer/backend/lazy_series.ex | 84 +---- lib/explorer/data_frame.ex | 5 + lib/explorer/duration.ex | 3 + lib/explorer/polars_backend/expression.ex | 1 + lib/explorer/polars_backend/native.ex | 1 + lib/explorer/polars_backend/series.ex | 47 ++- lib/explorer/series.ex | 119 +++--- lib/explorer/shared.ex | 45 +++ native/explorer/src/expressions.rs | 25 +- native/explorer/src/lib.rs | 1 + test/explorer/series/duration_test.exs | 438 +++++++++++++++++++++- test/explorer/series_test.exs | 2 +- 12 files changed, 627 insertions(+), 144 deletions(-) diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 858779985..9451ee59e 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -135,7 +135,8 @@ defmodule Explorer.Backend.LazySeries do @comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal] - @arithmetic_operations [:pow, :quotient, :remainder] + @basic_arithmetic_operations [:add, :subtract, :multiply, :divide] + @other_arithmetic_operations [:pow, :quotient, :remainder] @aggregation_operations [ :sum, @@ -192,38 +193,6 @@ defmodule Explorer.Backend.LazySeries do Backend.Series.new(data, dtype) end - @impl true - def add(left, right) do - args = [data!(left), data!(right)] - data = new(:add, args, aggregations?(args)) - dtype = resolve_numeric_temporal_dtype(:add, left, right) - Backend.Series.new(data, dtype) - end - - @impl true - def subtract(left, right) do - args = [data!(left), data!(right)] - data = new(:subtract, args, aggregations?(args)) - dtype = resolve_numeric_temporal_dtype(:subtract, left, right) - Backend.Series.new(data, dtype) - end - - @impl true - def multiply(left, right) do - args = [data!(left), data!(right)] - data = new(:multiply, args, aggregations?(args)) - dtype = resolve_numeric_temporal_dtype(:multiply, left, right) - Backend.Series.new(data, dtype) - end - - @impl true - def divide(left, right) do - args = [data!(left), data!(right)] - data = new(:divide, args, aggregations?(args)) - dtype = resolve_numeric_temporal_dtype(:divide, left, right) - Backend.Series.new(data, dtype) - end - @impl true def from_list(list, dtype) when is_list(list) and dtype in @valid_dtypes do data = new(:from_list, [list, dtype], false) @@ -412,7 +381,19 @@ defmodule Explorer.Backend.LazySeries do end end - for op <- @arithmetic_operations do + for op <- @basic_arithmetic_operations do + @impl true + def unquote(op)(%Series{} = left, %Series{} = right) do + dtype = Explorer.Shared.cast_to_arithmetic(unquote(op), dtype(left), dtype(right)) + + args = [data!(left), data!(right)] + data = new(unquote(op), args, aggregations?(args)) + + Backend.Series.new(data, dtype) + end + end + + for op <- @other_arithmetic_operations do @impl true def unquote(op)(left, right) do dtype = resolve_numeric_dtype([left, right]) @@ -654,41 +635,6 @@ defmodule Explorer.Backend.LazySeries do defp resolve_numeric_dtype(:window_mean, _items), do: :float defp resolve_numeric_dtype(_op, items), do: resolve_numeric_dtype(items) - defp resolve_numeric_temporal_dtype(op, %Series{dtype: ldt} = left, %Series{dtype: rdt} = right) do - case {op, ldt, rdt} do - {:add, {:datetime, ltu}, {:duration, rtu}} -> {:datetime, highest_precision(ltu, rtu)} - {:add, {:duration, ltu}, {:datetime, rtu}} -> {:datetime, highest_precision(ltu, rtu)} - {:add, {:duration, ltu}, {:duration, rtu}} -> {:duration, highest_precision(ltu, rtu)} - {:subtract, {:datetime, ltu}, {:datetime, rtu}} -> {:duration, highest_precision(ltu, rtu)} - {:subtract, {:datetime, ltu}, {:duration, rtu}} -> {:datetime, highest_precision(ltu, rtu)} - {:subtract, {:duration, ltu}, {:duration, rtu}} -> {:duration, highest_precision(ltu, rtu)} - {:multiply, :integer, {:duration, tu}} -> {:duration, tu} - {:multiply, {:duration, tu}, :integer} -> {:duration, tu} - {:divide, {:duration, tu}, :integer} -> {:duration, tu} - {:divide, _, {:duration, _}} -> raise("cannot divide by duration") - {:divide, _, _} -> :float - _ -> resolve_numeric_dtype([left, right]) - end - end - - defp resolve_numeric_temporal_dtype(op, left, right) do - case op do - :divide -> :float - _ -> resolve_numeric_dtype([left, right]) - end - end - - defp highest_precision(left_timeunit, right_timeunit) do - # Higher precision wins, otherwise information is lost. - case {left_timeunit, right_timeunit} do - {equal, equal} -> equal - {:nanosecond, _} -> :nanosecond - {_, :nanosecond} -> :nanosecond - {:microsecond, _} -> :microsecond - {_, :microsecond} -> :microsecond - end - end - # Returns the inner `data` if it's a lazy series. Otherwise raises an error. defp lazy_series!(series) do case series do diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 855b91e70..98afa1bdc 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2813,6 +2813,11 @@ defmodule Explorer.DataFrame do Explorer.Backend.Series.new(lazy_s, {:datetime, :microsecond}) + duration = %Explorer.Duration{} -> + lazy_s = LazySeries.new(:to_lazy, [duration]) + + Explorer.Backend.Series.new(lazy_s, {:datetime, duration.precision}) + other -> raise ArgumentError, "expecting a lazy series or scalar value, but instead got #{inspect(other)}" diff --git a/lib/explorer/duration.ex b/lib/explorer/duration.ex index f80662397..e86b70cfc 100644 --- a/lib/explorer/duration.ex +++ b/lib/explorer/duration.ex @@ -6,6 +6,9 @@ defmodule Explorer.Duration do @enforce_keys [:value, :precision] defstruct [:value, :precision] + @type precision :: :millisecond | :microsecond | :nanosecond + @type t :: %__MODULE__{value: integer(), precision: precision()} + # Nanosecond constants @us_ns 1_000 @ms_ns 1_000 * @us_ns diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index 3eecf5901..403bf7fa2 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -265,6 +265,7 @@ defmodule Explorer.PolarsBackend.Expression do def to_expr(number) when is_float(number), do: Native.expr_float(number) def to_expr(%Date{} = date), do: Native.expr_date(date) def to_expr(%NaiveDateTime{} = datetime), do: Native.expr_datetime(datetime) + def to_expr(%Explorer.Duration{} = duration), do: Native.expr_duration(duration) def to_expr(%PolarsSeries{} = polars_series), do: Native.expr_series(polars_series) # Used by Explorer.PolarsBackend.DataFrame diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index f7cae3b69..046d714a6 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -163,6 +163,7 @@ defmodule Explorer.PolarsBackend.Native do def expr_boolean(_bool), do: err() def expr_date(_date), do: err() def expr_datetime(_datetime), do: err() + def expr_duration(_duration), do: err() def expr_describe_filter_plan(_df, _expr), do: err() def expr_float(_number), do: err() def expr_integer(_number), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 0d2ae90d0..f82ca3da8 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -279,20 +279,55 @@ defmodule Explorer.PolarsBackend.Series do # Arithmetic @impl true - def add(left, right), - do: Shared.apply_series(matching_size!(left, right), :s_add, [right.data]) + def add(left, right) do + left = matching_size!(left, right) + + # `duration + date` is not supported by polars for some reason. + # `date + duration` is, so we're swapping arguments as a work around. + [left, right] = + case {dtype(left), dtype(right)} do + {{:duration, _}, :date} -> [right, left] + _ -> [left, right] + end + + Shared.apply_series(left, :s_add, [right.data]) + end @impl true def subtract(left, right), do: Shared.apply_series(matching_size!(left, right), :s_subtract, [right.data]) @impl true - def multiply(left, right), - do: Shared.apply_series(matching_size!(left, right), :s_multiply, [right.data]) + def multiply(left, right) do + result = Shared.apply_series(matching_size!(left, right), :s_multiply, [right.data]) + expected_dtype = Explorer.Shared.cast_to_arithmetic(:multiply, dtype(left), dtype(right)) + + # Polars currently returns inconsistent dtypes, e.g.: + # * `integer * duration -> duration` when `integer` is a scalar + # * `integer * duration -> integer` when `integer` is a series + # We need to return duration in these cases, so we need an additional cast. + if match?({:duration, _}, expected_dtype) and expected_dtype != dtype(result) do + cast(result, expected_dtype) + else + result + end + end @impl true - def divide(left, right), - do: Shared.apply_series(matching_size!(left, right), :s_divide, [right.data]) + def divide(left, right) do + result = Shared.apply_series(matching_size!(left, right), :s_divide, [right.data]) + expected_dtype = Explorer.Shared.cast_to_arithmetic(:divide, dtype(left), dtype(right)) + + # Polars currently returns inconsistent dtypes, e.g.: + # * `duration / integer -> duration` when `integer` is a scalar + # * `duration / integer -> integer` when `integer` is a series + # We need to return duration in these cases, so we need an additional cast. + if match?({:duration, _}, expected_dtype) and expected_dtype != dtype(result) do + cast(result, expected_dtype) + else + result + end + end @impl true def quotient(left, right), diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 357d15480..e82823d4b 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -59,6 +59,7 @@ defmodule Explorer.Series do alias __MODULE__, as: Series alias Kernel, as: K + alias Explorer.Duration alias Explorer.Shared @valid_dtypes Explorer.Shared.dtypes() @@ -2498,10 +2499,21 @@ defmodule Explorer.Series do # Arithmetic - defp enforce_highest_precision( + defp cast_for_arithmetic(function, [_, _] = args) do + args + |> case do + [%Series{}, %Series{}] -> args + [left, %Series{} = right] -> [from_list([left]), right] + [%Series{} = left, right] -> [left, from_list([right])] + [left, right] -> no_series_error(function, left, right) + end + |> enforce_highest_precision() + end + + defp enforce_highest_precision([ %Series{dtype: {left_base, left_timeunit}} = left, %Series{dtype: {right_base, right_timeunit}} = right - ) do + ]) do # Higher precision wins, otherwise information is lost. case {left_timeunit, right_timeunit} do {equal, equal} -> [left, right] @@ -2512,6 +2524,8 @@ defmodule Explorer.Series do end end + defp enforce_highest_precision(args), do: args + @doc """ Adds right to left, element-wise. @@ -2554,25 +2568,18 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec add( - left :: Series.t() | number() | NaiveDateTime.t(), - right :: Series.t() | number() | NaiveDateTime.t() + left :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t(), + right :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t() ) :: Series.t() - def add(%NaiveDateTime{} = left, %Series{dtype: {:duration, timeunit}} = right), - do: apply_series_list(:add, [from_same_value(right, left, {:datetime, timeunit}), right]) - - def add(%Series{dtype: {:duration, timeunit}} = left, %NaiveDateTime{} = right), - do: apply_series_list(:add, [left, from_same_value(left, right, {:datetime, timeunit})]) - - def add(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:duration, _}} = right), - do: apply_series_list(:add, enforce_highest_precision(left, right)) - - def add(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:datetime, _}} = right), - do: apply_series_list(:add, enforce_highest_precision(left, right)) + def add(left, right) do + [left, right] = cast_for_arithmetic("add/2", [left, right]) - def add(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:duration, _}} = right), - do: apply_series_list(:add, enforce_highest_precision(left, right)) - - def add(left, right), do: basic_numeric_operation(:add, left, right) + if _dtype = Shared.cast_to_arithmetic(:add, dtype(left), dtype(right)) do + apply_series_list(:add, [left, right]) + else + dtype_mismatch_error("add/2", left, right) + end + end @doc """ Subtracts right from left, element-wise. @@ -2616,28 +2623,18 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec subtract( - left :: Series.t() | number() | NaiveDateTime.t(), - right :: Series.t() | number() | NaiveDateTime.t() + left :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t(), + right :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t() ) :: Series.t() - def subtract(%NaiveDateTime{} = left, %Series{dtype: {:datetime, timeunit}} = right), - do: apply_series_list(:subtract, [from_list([left], dtype: {:datetime, timeunit}), right]) - - def subtract(%Series{dtype: {:datetime, timeunit}} = left, %NaiveDateTime{} = right), - do: apply_series_list(:subtract, [left, from_list([right], dtype: {:datetime, timeunit})]) - - def subtract(%NaiveDateTime{} = left, %Series{dtype: {:duration, timeunit}} = right), - do: apply_series_list(:subtract, [from_list([left], dtype: {:datetime, timeunit}), right]) - - def subtract(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:datetime, _}} = right), - do: apply_series_list(:subtract, enforce_highest_precision(left, right)) + def subtract(left, right) do + [left, right] = cast_for_arithmetic("subtract/2", [left, right]) - def subtract(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:duration, _}} = right), - do: apply_series_list(:subtract, enforce_highest_precision(left, right)) - - def subtract(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:duration, _}} = right), - do: apply_series_list(:subtract, enforce_highest_precision(left, right)) - - def subtract(left, right), do: basic_numeric_operation(:subtract, left, right) + if _dtype = Shared.cast_to_arithmetic(:subtract, dtype(left), dtype(right)) do + apply_series_list(:subtract, [left, right]) + else + dtype_mismatch_error("subtract/2", left, right) + end + end @doc """ Multiplies left and right, element-wise. @@ -2671,8 +2668,19 @@ defmodule Explorer.Series do > """ @doc type: :element_wise - @spec multiply(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() - def multiply(left, right), do: basic_numeric_operation(:multiply, left, right) + @spec multiply( + left :: Series.t() | number() | Duration.t(), + right :: Series.t() | number() | Duration.t() + ) :: Series.t() + def multiply(left, right) do + [left, right] = cast_for_arithmetic("multiply/2", [left, right]) + + if _dtype = Shared.cast_to_arithmetic(:multiply, dtype(left), dtype(right)) do + apply_series_list(:multiply, [left, right]) + else + dtype_mismatch_error("multiply/2", left, right) + end + end @doc """ Divides left by right, element-wise. @@ -2721,11 +2729,22 @@ defmodule Explorer.Series do > """ @doc type: :element_wise - @spec divide(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() - def divide(_, %Series{dtype: {:duration, _}}), - do: raise(ArgumentError, "cannot divide by duration") + @spec divide( + left :: Series.t() | number() | Duration.t(), + right :: Series.t() | number() + ) :: Series.t() + def divide(left, right) do + [left, right] = cast_for_arithmetic("divide/2", [left, right]) - def divide(left, right), do: basic_numeric_operation(:divide, left, right) + if _dtype = Shared.cast_to_arithmetic(:divide, dtype(left), dtype(right)) do + apply_series_list(:divide, [left, right]) + else + case dtype(right) do + {:duration, _} -> raise(ArgumentError, "cannot divide by duration") + _ -> dtype_mismatch_error("divide/2", left, right) + end + end + end @doc """ Raises a numeric series to the power of the exponent. @@ -3123,10 +3142,13 @@ defmodule Explorer.Series do do: dtype_error("#{operation}/#{length(args) + 2}", dtype, [:integer, :float]) defp basic_numeric_operation(operation, left, right, args) - when K.and(is_numeric(left), is_numeric(right)) do + when K.and(is_numeric(left), is_numeric(right)), + do: no_series_error("#{operation}/#{length(args) + 2}", left, right) + + defp no_series_error(function, left, right) do raise ArgumentError, - "#{operation}/#{length(args) + 2} expect a series as one of its arguments, " <> - "instead got two numbers: #{inspect(left)} and #{inspect(right)}" + "#{function} expects a series as one of its arguments, " <> + "instead got two scalars: #{inspect(left)} and #{inspect(right)}" end # Comparisons @@ -5025,6 +5047,7 @@ defmodule Explorer.Series do ) end + @spec dtype_mismatch_error(String.t(), any(), any(), [any()]) :: no_return() defp dtype_mismatch_error(function, left, right, valid) do left_series? = match?(%Series{}, left) right_series? = match?(%Series{}, right) diff --git a/lib/explorer/shared.ex b/lib/explorer/shared.ex index c250a5502..cf4ff2947 100644 --- a/lib/explorer/shared.ex +++ b/lib/explorer/shared.ex @@ -217,6 +217,51 @@ defmodule Explorer.Shared do defp type(item, _type) when is_nil(item), do: nil defp type(item, _type), do: raise(ArgumentError, "unsupported datatype: #{inspect(item)}") + @doc """ + The return dtype for the basic arithmetic operations: add, subtract, multiply, and divide. + + This function assumes that the inputs have already had the highest precision enforced. + """ + def cast_to_arithmetic(:add, :integer, :integer), do: :integer + def cast_to_arithmetic(:add, :integer, :float), do: :float + def cast_to_arithmetic(:add, :float, :integer), do: :float + def cast_to_arithmetic(:add, :float, :float), do: :float + def cast_to_arithmetic(:add, :date, {:duration, _}), do: :date + def cast_to_arithmetic(:add, {:duration, _}, :date), do: :date + def cast_to_arithmetic(:add, {:datetime, p}, {:duration, p}), do: {:datetime, p} + def cast_to_arithmetic(:add, {:duration, p}, {:datetime, p}), do: {:datetime, p} + def cast_to_arithmetic(:add, {:duration, p}, {:duration, p}), do: {:duration, p} + def cast_to_arithmetic(:add, _, _), do: nil + + def cast_to_arithmetic(:subtract, :integer, :integer), do: :integer + def cast_to_arithmetic(:subtract, :integer, :float), do: :float + def cast_to_arithmetic(:subtract, :float, :integer), do: :float + def cast_to_arithmetic(:subtract, :float, :float), do: :float + def cast_to_arithmetic(:subtract, :date, :date), do: {:duration, :millisecond} + def cast_to_arithmetic(:subtract, :date, {:duration, _}), do: :date + def cast_to_arithmetic(:subtract, {:datetime, p}, {:datetime, p}), do: {:duration, p} + def cast_to_arithmetic(:subtract, {:datetime, p}, {:duration, p}), do: {:datetime, p} + def cast_to_arithmetic(:subtract, {:duration, p}, {:duration, p}), do: {:duration, p} + def cast_to_arithmetic(:subtract, _, _), do: nil + + def cast_to_arithmetic(:multiply, :integer, :integer), do: :integer + def cast_to_arithmetic(:multiply, :integer, :float), do: :float + def cast_to_arithmetic(:multiply, :float, :integer), do: :float + def cast_to_arithmetic(:multiply, :float, :float), do: :float + def cast_to_arithmetic(:multiply, :integer, {:duration, p}), do: {:duration, p} + def cast_to_arithmetic(:multiply, {:duration, p}, :integer), do: {:duration, p} + def cast_to_arithmetic(:multiply, :float, {:duration, p}), do: {:duration, p} + def cast_to_arithmetic(:multiply, {:duration, p}, :float), do: {:duration, p} + def cast_to_arithmetic(:multiply, _, _), do: nil + + def cast_to_arithmetic(:divide, :integer, :integer), do: :float + def cast_to_arithmetic(:divide, :integer, :float), do: :float + def cast_to_arithmetic(:divide, :float, :integer), do: :float + def cast_to_arithmetic(:divide, :float, :float), do: :float + def cast_to_arithmetic(:divide, {:duration, p}, :integer), do: {:duration, p} + def cast_to_arithmetic(:divide, {:duration, p}, :float), do: {:duration, p} + def cast_to_arithmetic(:divide, _, _), do: nil + @doc """ Downcasts lists of mixed numeric types (float and int) to float. """ diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 96b1eca75..32d9fb312 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -10,7 +10,8 @@ use polars::prelude::{ }; use polars::prelude::{AnyValue, DataType, Expr, Literal, StrptimeOptions, TimeUnit}; -use crate::datatypes::{ExDate, ExDateTime}; +use crate::atoms::{microsecond, millisecond, nanosecond}; +use crate::datatypes::{ExDate, ExDateTime, ExDuration}; use crate::series::{cast_str_to_dtype, cast_str_to_f64, ewm_opts, rolling_opts}; use crate::{ExDataFrame, ExExpr, ExSeries}; @@ -66,6 +67,28 @@ pub fn expr_datetime(datetime: ExDateTime) -> ExExpr { ExExpr::new(expr) } +#[rustler::nif] +pub fn expr_duration(duration: ExDuration) -> ExExpr { + // Note: it's tempting to use `.lit()` on a `chrono::Duration` struct in this function, but + // doing so will lose precision information as `chrono::Duration`s have no time units. + let time_unit = time_unit_of_ex_duration(duration); + let expr = Expr::Literal(LiteralValue::Duration(duration.value, time_unit)); + ExExpr::new(expr) +} + +fn time_unit_of_ex_duration(duration: ExDuration) -> TimeUnit { + let precision = duration.precision; + if precision == millisecond() { + TimeUnit::Milliseconds + } else if precision == microsecond() { + TimeUnit::Microseconds + } else if precision == nanosecond() { + TimeUnit::Nanoseconds + } else { + panic!("unrecognized precision: {precision:?}") + } +} + #[rustler::nif] pub fn expr_series(series: ExSeries) -> ExExpr { let series = series.clone_inner(); diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index 707801562..db73a0053 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -145,6 +145,7 @@ rustler::init!( expr_column, expr_date, expr_datetime, + expr_duration, expr_day_of_week, expr_month, expr_year, diff --git a/test/explorer/series/duration_test.exs b/test/explorer/series/duration_test.exs index 4c2ea723f..849827c40 100644 --- a/test/explorer/series/duration_test.exs +++ b/test/explorer/series/duration_test.exs @@ -4,8 +4,13 @@ defmodule Explorer.Series.DurationTest do alias Explorer.Duration alias Explorer.Series + @aug_20 ~D[2023-08-20] + @aug_21 ~D[2023-08-21] + @one_hour_ms 3600 * 1_000 @one_hour_us 3600 * 1_000_000 + @one_hour_duration_ms %Duration{value: @one_hour_ms, precision: :millisecond} @one_hour_duration_us %Duration{value: @one_hour_us, precision: :microsecond} + @one_day_duration_ms %Duration{value: 24 * @one_hour_ms, precision: :millisecond} describe "list" do test "from a list of integers" do @@ -134,6 +139,121 @@ defmodule Explorer.Series.DurationTest do end describe "add" do + # Duration only + + test "duration[μs] + duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_hour_s, two_hour_s) + + three_hour_duration_us = %Duration{value: 3 * @one_hour_us, precision: :microsecond} + assert sum_s.dtype == {:duration, :microsecond} + assert Series.to_list(sum_s) == [three_hour_duration_us] + end + + test "duration[ms] + duration[μs] (different precisions)" do + one_hour_ms_s = Series.from_list([@one_hour_duration_ms]) + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + sum_s = Series.add(one_hour_ms_s, one_hour_us_s) + + # Since we added a duration with :millisecond precision to a datetime with :microsecond + # precision, the resulting difference has :microsecond precision since that was the highest + # precision present in the operation. + assert one_hour_ms_s.dtype == {:duration, :millisecond} + assert one_hour_us_s.dtype == {:duration, :microsecond} + assert sum_s.dtype == {:duration, :microsecond} + + two_hour_duration_us = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(sum_s) == [two_hour_duration_us] + end + + # Date + + test "date + duration[μs]" do + aug_20_s = Series.from_list([@aug_20]) + + # Adding a duration less than a day results in the same date. + one_hour_s = Series.from_list([@one_hour_duration_us]) + sum_s = Series.add(aug_20_s, one_hour_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_20] + + # Adding a duration at least a day results in the next date. + one_day_s = Series.from_list([24 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(aug_20_s, one_day_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_21] + end + + test "duration[μs] + date" do + aug_20_s = Series.from_list([@aug_20]) + + # Adding a duration less than a day results in the same date. + one_hour_s = Series.from_list([@one_hour_duration_us]) + sum_s = Series.add(one_hour_s, aug_20_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_20] + + # Adding a duration at least a day results in the next date. + one_day_s = Series.from_list([24 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_day_s, aug_20_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_21] + end + + test "Date + duration[μs]" do + # Adding a duration less than a day results in the same date. + one_hour_s = Series.from_list([@one_hour_duration_us]) + sum_s = Series.add(@aug_20, one_hour_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_20] + + # Adding a duration at least a day results in the next date. + one_day_s = Series.from_list([24 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(@aug_20, one_day_s) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_21] + end + + test "duration[μs] + Date" do + # Adding a duration less than a day results in the same date. + one_hour_s = Series.from_list([@one_hour_duration_us]) + sum_s = Series.add(one_hour_s, @aug_20) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_20] + + # Adding a duration at least a day results in the next date. + one_day_s = Series.from_list([24 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_day_s, @aug_20) + + assert sum_s.dtype == :date + assert Series.to_list(sum_s) == [@aug_21] + end + + test "Date + Date raises ArgumentError" do + assert_raise ArgumentError, + "add/2 expects a series as one of its arguments, instead got two scalars: ~D[2023-08-20] and ~D[2023-08-21]", + fn -> Series.add(@aug_20, @aug_21) end + end + + test "date + date raises ArgumentError" do + aug_20_s = Series.from_list([@aug_20]) + aug_21_s = Series.from_list([@aug_21]) + + assert_raise ArgumentError, + "cannot invoke Explorer.Series.add/2 with mismatched dtypes: :date and :date", + fn -> Series.add(aug_20_s, aug_21_s) end + end + + # Datetime + test "datetime[μs] + duration[μs]" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) @@ -154,16 +274,6 @@ defmodule Explorer.Series.DurationTest do assert Series.to_list(sum_s) == [twelve_ndt] end - test "duration[μs] + duration[μs]" do - one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) - two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) - sum_s = Series.add(one_hour_s, two_hour_s) - - three_hour_duration_us = %Duration{value: 3 * @one_hour_us, precision: :microsecond} - assert sum_s.dtype == {:duration, :microsecond} - assert Series.to_list(sum_s) == [three_hour_duration_us] - end - test "NaiveDateTime + duration[μs]" do eleven = ~N[2023-08-20 11:00:00.0000000] one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) @@ -206,6 +316,84 @@ defmodule Explorer.Series.DurationTest do end describe "subtract" do + # Duration only + + test "duration[μs] - duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) + diff_s = Series.subtract(two_hour_s, one_hour_s) + + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [@one_hour_duration_us] + end + + test "duration[ms] - duration[μs] (different precisions)" do + two_hour_us_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) + one_hour_ms_s = Series.from_list([@one_hour_duration_ms]) + diff_s = Series.subtract(two_hour_us_s, one_hour_ms_s) + + # Since we subtracted a duration with :millisecond precision from a duration with :microsecond + # precision, the resulting difference has :microsecond precision since that was the highest + # precision present in the operation. + assert two_hour_us_s.dtype == {:duration, :microsecond} + assert one_hour_ms_s.dtype == {:duration, :millisecond} + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [@one_hour_duration_us] + end + + # Date + + test "date - date" do + aug_20_s = Series.from_list([@aug_20]) + aug_21_s = Series.from_list([@aug_21]) + diff_s = Series.subtract(aug_21_s, aug_20_s) + + assert diff_s.dtype == {:duration, :millisecond} + assert Series.to_list(diff_s) == [@one_day_duration_ms] + end + + test "Date - date" do + aug_20_s = Series.from_list([@aug_20]) + diff_s = Series.subtract(@aug_21, aug_20_s) + + assert diff_s.dtype == {:duration, :millisecond} + assert Series.to_list(diff_s) == [@one_day_duration_ms] + end + + test "date - Date" do + aug_21_s = Series.from_list([@aug_21]) + diff_s = Series.subtract(aug_21_s, @aug_20) + + assert diff_s.dtype == {:duration, :millisecond} + assert Series.to_list(diff_s) == [@one_day_duration_ms] + end + + test "Date - Date raises ArgumentError" do + assert_raise ArgumentError, + "subtract/2 expects a series as one of its arguments, instead got two scalars: ~D[2023-08-21] and ~D[2023-08-20]", + fn -> Series.subtract(@aug_21, @aug_20) end + end + + test "date - duration[ms]" do + aug_21_s = Series.from_list([@aug_21]) + + # Subtracting a duration less than a day results in the same date. + one_hour_s = Series.from_list([@one_hour_duration_ms]) + diff_s = Series.subtract(aug_21_s, one_hour_s) + + assert diff_s.dtype == :date + assert Series.to_list(diff_s) == [@aug_20] + + # Subtracting a duration at least a day results in the previous date. + one_day_s = Series.from_list([@one_day_duration_ms]) + diff_s = Series.subtract(aug_21_s, one_day_s) + + assert diff_s.dtype == :date + assert Series.to_list(diff_s) == [@aug_20] + end + + # Datetime + test "datetime[μs] - datetime[μs]" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) @@ -224,15 +412,6 @@ defmodule Explorer.Series.DurationTest do assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] end - test "duration[μs] - duration[μs]" do - one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) - two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) - diff_s = Series.subtract(two_hour_s, one_hour_s) - - assert diff_s.dtype == {:duration, :microsecond} - assert Series.to_list(diff_s) == [@one_hour_duration_us] - end - test "NaiveDateTime - datetime[μs]" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) twelve = ~N[2023-08-20 12:00:00.0000000] @@ -284,6 +463,116 @@ defmodule Explorer.Series.DurationTest do end describe "multiply" do + # Integer + + test "integer * duration[μs]" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2]) + product_s = Series.multiply(two_s, one_hour_us_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "duration[μs] * integer" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2]) + product_s = Series.multiply(one_hour_us_s, two_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "Integer * duration[μs]" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + product_s = Series.multiply(2, one_hour_us_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "duration[μs] * Integer" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + product_s = Series.multiply(one_hour_us_s, 2) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + # Float + + test "float * duration[μs]" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2.0]) + product_s = Series.multiply(two_s, one_hour_us_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "duration[μs] * float" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2.0]) + product_s = Series.multiply(one_hour_us_s, two_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "Float * duration[μs]" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + product_s = Series.multiply(2.0, one_hour_us_s) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "duration[μs] * Float" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + product_s = Series.multiply(one_hour_us_s, 2.0) + + assert product_s.dtype == {:duration, :microsecond} + two_hour_duration_s = %Duration{value: 2 * @one_hour_us, precision: :microsecond} + assert Series.to_list(product_s) == [two_hour_duration_s] + end + + test "fractional parts of floats work (roughly) as expected" do + # This test is not exhaustive. Rather, its purpose is to give us a reasonable confidence that + # multiplying durations by floats is fairly accurate. + # + # The exact answers we see here are subject to implementation details outside our control. + # If we find that this test breaks unexpectedly (e.g. from a dependency update), then we may + # wish to remove it. + one_s = 1 / 3_600 + one_ms = 1 / 3_600_000 + one_us = 1 / 3_600_000_000 + one_ns = 1 / 3_600_000_000_000 + + float_string_pairs = [ + {3 / 4, "45m"}, + {3 / 2, "1h 30m"}, + {1.0 + one_s, "1h 1s"}, + # Float rounding issue (but only off by one). + {1.0 + one_ms, "1h 999us 999ns"}, + {1.0 + one_us, "1h 1us"}, + {1.0 + one_ns, "1h 1ns"} + ] + + one_hour_ns_s = Series.from_list([1_000 * @one_hour_us], dtype: {:duration, :nanosecond}) + + for {float, expected} <- float_string_pairs do + [duration] = one_hour_ns_s |> Series.multiply(float) |> Series.to_list() + assert to_string(duration) == expected + end + end + test "duration[μs] * duration[μs] raises ArgumentError" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) @@ -294,6 +583,64 @@ defmodule Explorer.Series.DurationTest do end describe "divide" do + # Integer + + test "duration[μs] / integer" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2]) + quotient_s = Series.divide(one_hour_us_s, two_s) + + assert quotient_s.dtype == {:duration, :microsecond} + thirty_min_duration_s = %Duration{value: div(@one_hour_us, 2), precision: :microsecond} + assert Series.to_list(quotient_s) == [thirty_min_duration_s] + end + + test "duration[μs] / Integer" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + quotient_s = Series.divide(one_hour_us_s, 2) + + assert quotient_s.dtype == {:duration, :microsecond} + thirty_min_duration_s = %Duration{value: div(@one_hour_us, 2), precision: :microsecond} + assert Series.to_list(quotient_s) == [thirty_min_duration_s] + end + + test "Integer / duration[μs] raises ArgumentError" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + + assert_raise ArgumentError, + "cannot divide by duration", + fn -> Series.divide(2, one_hour_s) end + end + + # Float + + test "duration[μs] / float" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + two_s = Series.from_list([2.0]) + quotient_s = Series.divide(one_hour_us_s, two_s) + + assert quotient_s.dtype == {:duration, :microsecond} + thirty_min_duration_s = %Duration{value: div(@one_hour_us, 2), precision: :microsecond} + assert Series.to_list(quotient_s) == [thirty_min_duration_s] + end + + test "duration[μs] / Float" do + one_hour_us_s = Series.from_list([@one_hour_duration_us]) + quotient_s = Series.divide(one_hour_us_s, 2.0) + + assert quotient_s.dtype == {:duration, :microsecond} + thirty_min_duration_s = %Duration{value: div(@one_hour_us, 2), precision: :microsecond} + assert Series.to_list(quotient_s) == [thirty_min_duration_s] + end + + test "Float / duration[μs] raises ArgumentError" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + + assert_raise ArgumentError, + "cannot divide by duration", + fn -> Series.divide(2.0, one_hour_s) end + end + test "duration[μs] / duration[μs] raises ArgumentError" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) @@ -322,5 +669,58 @@ defmodule Explorer.Series.DurationTest do >\ """ end + + test "mutate/2 with scalar Duration" do + require Explorer.DataFrame + alias Explorer.DataFrame, as: DF + + ms = %Duration{value: 1_000, precision: :millisecond} + us = %Duration{value: 1_000, precision: :microsecond} + ns = %Duration{value: 1_000, precision: :nanosecond} + + df = DF.new([]) + + df = DF.mutate(df, ms: ^ms) + assert df["ms"].dtype == {:duration, :millisecond} + assert Series.to_list(df["ms"]) == [ms] + + df = DF.mutate(df, us: ^us) + assert df["us"].dtype == {:duration, :microsecond} + assert Series.to_list(df["us"]) == [us] + + df = DF.mutate(df, ns: ^ns) + assert df["ns"].dtype == {:duration, :nanosecond} + assert Series.to_list(df["ns"]) == [ns] + end + + # There is currently an issue with Polars where `duration + date` is not supported but + # `date + duration` is. There is a workaround in `Series.add/2` where we swap the args, but + # that workaround does not extend to expressions. This test contains scenarios that a solution + # which extends to expressions will need to cover. + @tag :skip + test "mutate/2 with duration + date" do + require Explorer.DataFrame + alias Explorer.DataFrame, as: DF + + aug_20 = Series.from_list([~D[2023-08-20]]) + aug_21 = Series.from_list([~D[2023-08-21]]) + df = DF.new(aug_20: aug_20, aug_21: aug_21, sub: Series.subtract(aug_21, aug_20)) + + df1 = DF.mutate(df, add1: sub + aug_20) + assert df1["add1"].dtype == :date + assert Series.to_list(df1["add1"]) == [~D[2023-08-21]] + + df2 = DF.mutate(df, add2: sub + ^df["aug_20"]) + assert df2["add2"].dtype == :date + assert Series.to_list(df2["add2"]) == [~D[2023-08-21]] + + df3 = DF.mutate(df, add3: sub + aug_20 + sub) + assert df3["add3"].dtype == :date + assert Series.to_list(df3["add3"]) == [~D[2023-08-22]] + + df4 = DF.mutate(df, add4: sub + (aug_20 + sub)) + assert df4["add4"].dtype == :date + assert Series.to_list(df4["add4"]) == [~D[2023-08-22]] + end end end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 88d4a9635..a194d29a9 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -1405,7 +1405,7 @@ defmodule Explorer.SeriesTest do test "adding two numbers" do assert_raise ArgumentError, - "add/2 expect a series as one of its arguments, instead got two numbers: 1 and 2", + "add/2 expects a series as one of its arguments, instead got two scalars: 1 and 2", fn -> Series.add(1, 2) end