diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 1d43beced..c8957a2a0 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -857,7 +857,7 @@ defmodule Explorer.Backend.LazySeries do @impl true def strptime(%Series{} = series, format_string) do - dtype = {:datetime, :microsecond} + dtype = {:naive_datetime, :microsecond} data = new(:strptime, [lazy_series!(series), format_string], dtype) Backend.Series.new(data, dtype) diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 4fa5a25ca..3474ff005 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -2857,7 +2857,10 @@ defmodule Explorer.DataFrame do date = %Date{} -> LazySeries.new(:lazy, [date], :date) - datetime = %NaiveDateTime{} -> + naive_datetime = %NaiveDateTime{} -> + LazySeries.new(:lazy, [naive_datetime], {:naive_datetime, :nanosecond}) + + datetime = %DateTime{} -> LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond}) duration = %Explorer.Duration{precision: precision} -> @@ -2949,10 +2952,10 @@ defmodule Explorer.DataFrame do in microseconds from the Unix epoch: iex> df = Explorer.DataFrame.new([]) - iex> Explorer.DataFrame.put(df, :a, Nx.tensor([1, 2, 3]), dtype: {:datetime, :microsecond}) + iex> Explorer.DataFrame.put(df, :a, Nx.tensor([1, 2, 3]), dtype: {:naive_datetime, :microsecond}) #Explorer.DataFrame< Polars[3 x 1] - a datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003] + a naive_datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003] > If there is already a column where we want to place the tensor, @@ -2964,7 +2967,7 @@ defmodule Explorer.DataFrame do iex> Explorer.DataFrame.put(df, :a, Nx.tensor(529550625987654)) #Explorer.DataFrame< Polars[1 x 1] - a datetime[μs] [1986-10-13 01:23:45.987654] + a naive_datetime[μs] [1986-10-13 01:23:45.987654] > This is particularly useful for categorical columns: @@ -5749,7 +5752,7 @@ defmodule Explorer.DataFrame do stat_cols = df.names percentiles = process_percentiles(opts[:percentiles]) numeric_types = Shared.numeric_types() - datetime_types = Shared.datetime_types() + naive_datetime_types = Shared.naive_datetime_types() duration_types = Shared.duration_types() metrics_df = @@ -5757,7 +5760,7 @@ defmodule Explorer.DataFrame do Enum.flat_map(stat_cols, fn c -> dt = x[c].dtype numeric? = dt in numeric_types - min_max? = numeric? or dt in datetime_types or dt in duration_types + min_max? = numeric? or dt in naive_datetime_types or dt in duration_types [ {"count:#{c}", Series.count(x[c])}, diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index aa5462147..ba9238e1d 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -342,7 +342,8 @@ defmodule Explorer.PolarsBackend.Native do def s_from_list_bool(_name, _val), do: err() def s_from_list_date(_name, _val), do: err() def s_from_list_time(_name, _val), do: err() - def s_from_list_datetime(_name, _val, _precision), do: err() + def s_from_list_naive_datetime(_name, _val, _precision), do: err() + def s_from_list_datetime(_name, _val, _precision, _tz), do: err() def s_from_list_duration(_name, _val, _precision), do: err() def s_from_list_f32(_name, _val), do: err() def s_from_list_f64(_name, _val), do: err() diff --git a/lib/explorer/polars_backend/shared.ex b/lib/explorer/polars_backend/shared.ex index cab2f2615..b5f4ac0c2 100644 --- a/lib/explorer/polars_backend/shared.ex +++ b/lib/explorer/polars_backend/shared.ex @@ -187,8 +187,9 @@ defmodule Explorer.PolarsBackend.Shared do :category -> Native.s_from_list_categories(name, list) :date -> Native.s_from_list_date(name, list) :time -> Native.s_from_list_time(name, list) - {:datetime, precision} -> Native.s_from_list_datetime(name, list, Atom.to_string(precision)) - {:duration, precision} -> Native.s_from_list_duration(name, list, Atom.to_string(precision)) + {:naive_datetime, precision} -> Native.s_from_list_naive_datetime(name, list, precision) + {:datetime, precision, tz} -> Native.s_from_list_datetime(name, list, precision, tz) + {:duration, precision} -> Native.s_from_list_duration(name, list, precision) :binary -> Native.s_from_list_binary(name, list) :null -> Native.s_from_list_null(name, length(list)) end @@ -205,13 +206,13 @@ defmodule Explorer.PolarsBackend.Shared do :time -> Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok() - {:datetime, :millisecond} -> + {:naive_datetime, :millisecond} -> Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok() - {:datetime, :microsecond} -> + {:naive_datetime, :microsecond} -> Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok() - {:datetime, :nanosecond} -> + {:naive_datetime, :nanosecond} -> Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok() {:duration, :millisecond} -> diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 158647a4d..1bf611cd8 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -8,8 +8,10 @@ defmodule Explorer.Series do * `:boolean` - Boolean * `:category` - Strings but represented internally as integers * `:date` - Date type that unwraps to `Elixir.Date` - * `{:datetime, precision}` - DateTime type with millisecond/microsecond/nanosecond + * `{:naive_datetime, precision}` - Naive DateTime type with millisecond/microsecond/nanosecond precision that unwraps to `Elixir.NaiveDateTime` + * `{:datetime, precision, time_zone}` - DateTime type with millisecond/microsecond/nanosecond + precision that unwraps to `Elixir.DateTime` * `{:duration, precision}` - Duration type with millisecond/microsecond/nanosecond precision that unwraps to `Explorer.Duration` * `{:f, size}` - a 64-bit or 32-bit floating point number @@ -119,13 +121,13 @@ defmodule Explorer.Series do alias Explorer.Duration alias Explorer.Shared - @datetime_dtypes Explorer.Shared.datetime_types() + @naive_datetime_dtypes Explorer.Shared.naive_datetime_types() @duration_dtypes Explorer.Shared.duration_types() @float_dtypes Explorer.Shared.float_types() @integer_types Explorer.Shared.integer_types() - @date_or_datetime_dtypes [:date | @datetime_dtypes] - @temporal_dtypes [:time | @date_or_datetime_dtypes ++ @duration_dtypes] + @date_or_naive_datetime_dtypes [:date | @naive_datetime_dtypes] + @temporal_dtypes [:time | @date_or_naive_datetime_dtypes ++ @duration_dtypes] @numeric_dtypes Explorer.Shared.numeric_types() @numeric_or_temporal_dtypes @numeric_dtypes ++ @temporal_dtypes @@ -139,6 +141,7 @@ defmodule Explorer.Series do | :date | :time | :string + | naive_datetime_dtype | datetime_dtype | duration_dtype | float_dtype @@ -148,7 +151,9 @@ defmodule Explorer.Series do | unsigned_integer_dtype @type time_unit :: :nanosecond | :microsecond | :millisecond - @type datetime_dtype :: {:datetime, time_unit} + @type time_zone :: String.t() + @type naive_datetime_dtype :: {:naive_datetime, time_unit} + @type datetime_dtype :: {:datetime, time_unit, time_zone} @type duration_dtype :: {:duration, time_unit} @type list_dtype :: {:list, dtype()} @type struct_dtype :: {:struct, [{String.t(), dtype()}]} @@ -173,6 +178,7 @@ defmodule Explorer.Series do | Date.t() | Time.t() | NaiveDateTime.t() + | DateTime.t() @doc false @enforce_keys [:data, :dtype] @@ -190,8 +196,49 @@ defmodule Explorer.Series do defguardp is_numeric_or_bool_dtype(dtype) when K.in(dtype, [:boolean | @numeric_dtypes]) + defguardp is_precision(precision) + when K.in(precision, [:millisecond, :microsecond, :nanosecond]) + + defguardp is_duration_dtype(dtype) + when is_tuple(dtype) + |> K.and(tuple_size(dtype) == 2) + |> K.and(elem(dtype, 0) == :duration) + |> K.and(elem(dtype, 1) |> is_precision()) + + defguardp is_naive_datetime_dtype(dtype) + when is_tuple(dtype) + |> K.and(tuple_size(dtype) == 2) + |> K.and(elem(dtype, 0) == :naive_datetime) + |> K.and(elem(dtype, 1) |> is_precision()) + + defguardp is_datetime_dtype(dtype) + when is_tuple(dtype) + |> K.and(tuple_size(dtype) == 3) + |> K.and(elem(dtype, 0) == :datetime) + |> K.and(elem(dtype, 1) |> is_precision()) + |> K.and(elem(dtype, 2) |> is_binary()) + + defguardp is_datetime_like_dtype(dtype) + when is_datetime_dtype(dtype) + |> K.or(is_naive_datetime_dtype(dtype)) + + defguardp is_date_like_dtype(dtype) + when (dtype == :date) + |> K.or(dtype |> is_datetime_like_dtype()) + + defguardp is_time_like_dtype(dtype) + when (dtype == :time) + |> K.or(dtype |> is_datetime_like_dtype()) + + defguardp is_temporal_dtype(dtype) + when (dtype == :date) + |> K.or(dtype == :time) + |> K.or(dtype |> is_datetime_like_dtype()) + |> K.or(dtype |> is_duration_dtype()) + defguardp is_numeric_or_temporal_dtype(dtype) - when K.in(dtype, @numeric_or_temporal_dtypes) + when is_numeric_dtype(dtype) + |> K.or(dtype |> is_temporal_dtype()) @impl true def fetch(series, idx) when is_integer(idx), do: {:ok, fetch!(series, idx)} @@ -320,9 +367,10 @@ defmodule Explorer.Series do null [nil, nil] > - A list of `Date`, `Time`, `NaiveDateTime`, and `Explorer.Duration` structs - are also supported, and they will become series with the respective dtypes: - `:date`, `:time`, `{:datetime, :microsecond}`, and `{:duration, precision}`. + A list of `Date`, `Time`, `NaiveDateTime`, `DateTime`, and + `Explorer.Duration` structs are also supported, and they will become series + with the respective dtypes: `:date`, `:time`, `{:naive_datetime, :microsecond}`, + and `{:duration, precision}`. For example: iex> Explorer.Series.from_list([~D[0001-01-01], ~D[1970-01-01], ~D[1986-10-13]]) @@ -385,10 +433,10 @@ defmodule Explorer.Series do It is possible to create a series of `:datetime` from a list of microseconds since Unix Epoch. - iex> Explorer.Series.from_list([1649883642 * 1_000 * 1_000], dtype: {:datetime, :microsecond}) + iex> Explorer.Series.from_list([1649883642 * 1_000 * 1_000], dtype: {:naive_datetime, :microsecond}) #Explorer.Series< Polars[1] - datetime[μs] [2022-04-13 21:00:42.000000] + naive_datetime[μs] [2022-04-13 21:00:42.000000] > It is possible to create a series of `:time` from a list of nanoseconds since midnight. @@ -486,10 +534,10 @@ defmodule Explorer.Series do Datetimes are encoded as s64 representing microseconds from the Unix epoch (1970-01-01): iex> binary = <<0::signed-64-native, 529550625987654::signed-64-native>> - iex> Explorer.Series.from_binary(binary, {:datetime, :microsecond}) + iex> Explorer.Series.from_binary(binary, {:naive_datetime, :microsecond}) #Explorer.Series< Polars[2] - datetime[μs] [1970-01-01 00:00:00.000000, 1986-10-13 01:23:45.987654] + naive_datetime[μs] [1970-01-01 00:00:00.000000, 1986-10-13 01:23:45.987654] > """ @@ -544,7 +592,7 @@ defmodule Explorer.Series do * `{:u, 8}` tensor as a `:boolean` series. * `{:s, 32}` tensor as a `:date` series. * `{:s, 64}` tensor as a `:time` series. - * `{:s, 64}` tensor as a `{:datetime, unit}` or `{:duration, unit}` series. + * `{:s, 64}` tensor as a `{:naive_datetime, unit}` or `{:duration, unit}` series. ## Examples @@ -600,10 +648,10 @@ defmodule Explorer.Series do Datetimes are signed 64-bit and therefore must have their dtype explicitly given: iex> tensor = Nx.tensor([0, 529550625987654]) - iex> Explorer.Series.from_tensor(tensor, dtype: {:datetime, :microsecond}) + iex> Explorer.Series.from_tensor(tensor, dtype: {:naive_datetime, :microsecond}) #Explorer.Series< Polars[2] - datetime[μs] [1970-01-01 00:00:00.000000, 1986-10-13 01:23:45.987654] + naive_datetime[μs] [1970-01-01 00:00:00.000000, 1986-10-13 01:23:45.987654] > """ @doc type: :conversion @@ -924,17 +972,17 @@ defmodule Explorer.Series do Note that `datetime` is represented as an integer of microseconds since Unix Epoch (1970-01-01 00:00:00). iex> s = Explorer.Series.from_list([1, 2, 3]) - iex> Explorer.Series.cast(s, {:datetime, :microsecond}) + iex> Explorer.Series.cast(s, {:naive_datetime, :microsecond}) #Explorer.Series< Polars[3] - datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003] + naive_datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003] > iex> s = Explorer.Series.from_list([1649883642 * 1_000 * 1_000]) - iex> Explorer.Series.cast(s, {:datetime, :microsecond}) + iex> Explorer.Series.cast(s, {:naive_datetime, :microsecond}) #Explorer.Series< Polars[1] - datetime[μs] [2022-04-13 21:00:42.000000] + naive_datetime[μs] [2022-04-13 21:00:42.000000] > You can also use `cast/2` to categorise a string: @@ -983,7 +1031,7 @@ defmodule Explorer.Series do iex> Explorer.Series.strptime(s, "%Y-%m-%d %H:%M:%S") #Explorer.Series< Polars[3] - datetime[μs] [2023-01-05 12:34:56.000000, nil, nil] + naive_datetime[μs] [2023-01-05 12:34:56.000000, nil, nil] > """ @doc type: :element_wise @@ -1013,11 +1061,11 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec strftime(series :: Series.t(), format_string :: String.t()) :: Series.t() - def strftime(%Series{dtype: dtype} = series, format_string) when K.in(dtype, @datetime_dtypes), + def strftime(%Series{dtype: dtype} = series, format_string) when is_datetime_like_dtype(dtype), do: apply_series(series, :strftime, [format_string]) def strftime(%Series{dtype: dtype}, _format_string), - do: dtype_error("strftime/2", dtype, @datetime_dtypes) + do: dtype_error("strftime/2", dtype, :datetime_like) @doc """ Clip (or clamp) the values in a series. @@ -2315,11 +2363,11 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.min(s) - ** (ArgumentError) Explorer.Series.min/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:datetime, :microsecond}, {:datetime, :millisecond}, {:datetime, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.min/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:naive_datetime, :microsecond}, {:naive_datetime, :millisecond}, {:naive_datetime, :nanosecond}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} """ @doc type: :aggregation @spec min(series :: Series.t()) :: - number() | non_finite() | Date.t() | Time.t() | NaiveDateTime.t() | nil + number() | non_finite() | Date.t() | Time.t() | DateTime.t() | NaiveDateTime.t() | nil def min(%Series{dtype: dtype} = series) when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :min) @@ -2334,6 +2382,7 @@ defmodule Explorer.Series do * integers: #{Shared.inspect_dtypes(@integer_types, backsticks: true)} * `:date` * `:time` + * `:naive_datetime` * `:datetime` * `:duration` @@ -2361,7 +2410,7 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.max(s) - ** (ArgumentError) Explorer.Series.max/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:datetime, :microsecond}, {:datetime, :millisecond}, {:datetime, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.max/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:naive_datetime, :microsecond}, {:naive_datetime, :millisecond}, {:naive_datetime, :nanosecond}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} """ @doc type: :aggregation @spec max(series :: Series.t()) :: @@ -2411,7 +2460,7 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.argmax(s) - ** (ArgumentError) Explorer.Series.argmax/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:datetime, :microsecond}, {:datetime, :millisecond}, {:datetime, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.argmax/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:naive_datetime, :microsecond}, {:naive_datetime, :millisecond}, {:naive_datetime, :nanosecond}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} """ @doc type: :aggregation @spec argmax(series :: Series.t()) :: number() | non_finite() | nil @@ -2469,7 +2518,7 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.argmin(s) - ** (ArgumentError) Explorer.Series.argmin/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:datetime, :microsecond}, {:datetime, :millisecond}, {:datetime, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.argmin/1 not implemented for dtype :string. Valid dtypes are :date, :time, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:naive_datetime, :microsecond}, {:naive_datetime, :millisecond}, {:naive_datetime, :nanosecond}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} """ @doc type: :aggregation @spec argmin(series :: Series.t()) :: number() | non_finite() | nil @@ -2606,7 +2655,7 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list([~N[2021-01-01 00:00:00], ~N[1999-12-31 00:00:00]]) iex> Explorer.Series.variance(s) - ** (ArgumentError) Explorer.Series.variance/1 not implemented for dtype {:datetime, :microsecond}. Valid dtypes are {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.variance/1 not implemented for dtype {:naive_datetime, :microsecond}. Valid dtypes are {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} """ @doc type: :aggregation @spec variance(series :: Series.t(), ddof :: non_neg_integer()) :: float() | non_finite() | nil @@ -2723,7 +2772,7 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list([true, false, true]) iex> Explorer.Series.quantile(s, 0.5) - ** (ArgumentError) Explorer.Series.quantile/2 not implemented for dtype :boolean. Valid dtypes are :date, :time, {:datetime, :microsecond}, {:datetime, :millisecond}, {:datetime, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, {:duration, :nanosecond}, {:f, 32}, {:f, 64}, {:s, 8}, {:s, 16}, {:s, 32}, {:s, 64}, {:u, 8}, {:u, 16}, {:u, 32} and {:u, 64} + ** (ArgumentError) Explorer.Series.quantile/2 not implemented for dtype :boolean. Valid dtypes are any subtype of [:u, :s, :f, :date, :time, :naive_datetime, :datetime] """ @doc type: :aggregation @spec quantile(series :: Series.t(), quantile :: float()) :: any() @@ -2731,8 +2780,10 @@ defmodule Explorer.Series do when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :quantile, [quantile]) - def quantile(%Series{dtype: dtype}, _), - do: dtype_error("quantile/2", dtype, @numeric_or_temporal_dtypes) + def quantile(%Series{dtype: dtype}, _) do + super_dtypes = [:u, :s, :f, :date, :time, :naive_datetime, :datetime] + super_dtype_error("quantile/2", dtype, super_dtypes) + end @doc """ Compute the sample skewness of a series. @@ -3212,22 +3263,33 @@ defmodule Explorer.Series do # TODO: maybe we can move this casting to Rust. defp enforce_highest_precision([ - %Series{dtype: {left_base, left_timeunit}} = left, - %Series{dtype: {right_base, right_timeunit}} = right + %Series{dtype: dtype_left} = left, + %Series{dtype: dtype_right} = right ]) - when K.and(is_atom(left_timeunit), is_atom(right_timeunit)) do + when K.and( + K.or(is_datetime_like_dtype(dtype_left), is_duration_dtype(dtype_left)), + K.or(is_datetime_like_dtype(dtype_right), is_duration_dtype(dtype_right)) + ) do # Higher precision wins, otherwise information is lost. - case {left_timeunit, right_timeunit} do + case {extract_precision(dtype_left), extract_precision(dtype_right)} do {equal, equal} -> [left, right] - {:nanosecond, _} -> [left, cast(right, {right_base, :nanosecond})] - {_, :nanosecond} -> [cast(left, {left_base, :nanosecond}), right] - {:microsecond, _} -> [left, cast(right, {right_base, :microsecond})] - {_, :microsecond} -> [cast(left, {left_base, :microsecond}), right] + {:nanosecond, _} -> [left, cast(right, update_precision(dtype_right, :nanosecond))] + {_, :nanosecond} -> [cast(left, update_precision(dtype_left, :nanosecond)), right] + {:microsecond, _} -> [left, cast(right, update_precision(dtype_right, :microsecond))] + {_, :microsecond} -> [cast(left, update_precision(dtype_right, :microsecond)), right] end end defp enforce_highest_precision(args), do: args + defp extract_precision({:naive_datetime, precision}), do: precision + defp extract_precision({:datetime, precision, _time_zone}), do: precision + defp extract_precision({:duration, precision}), do: precision + + defp update_precision({:naive_datetime, _old}, new), do: {:naive_datetime, new} + defp update_precision({:datetime, _old, time_zone}, new), do: {:datetime, new, time_zone} + defp update_precision({:duration, _old}, new), do: {:duration, new} + @doc """ Adds right to left, element-wise. @@ -3270,8 +3332,10 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec add( - left :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t(), - right :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t() + left :: + Series.t() | number() | Date.t() | DateTime.t() | NaiveDateTime.t() | Duration.t(), + right :: + Series.t() | number() | Date.t() | DateTime.t() | NaiveDateTime.t() | Duration.t() ) :: Series.t() def add(left, right) do [left, right] = cast_for_arithmetic("add/2", [left, right]) @@ -3285,8 +3349,10 @@ defmodule Explorer.Series do defp cast_to_add(:date, {:duration, _}), do: :date defp cast_to_add({:duration, _}, :date), do: :date - defp cast_to_add({:datetime, p}, {:duration, p}), do: {:datetime, p} - defp cast_to_add({:duration, p}, {:datetime, p}), do: {:datetime, p} + defp cast_to_add({:naive_datetime, p}, {:duration, p}), do: {:naive_datetime, p} + defp cast_to_add({:duration, p}, {:naive_datetime, p}), do: {:naive_datetime, p} + defp cast_to_add({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz} + defp cast_to_add({:duration, p}, {:datetime, p, tz}), do: {:datetime, p, tz} defp cast_to_add({:duration, p}, {:duration, p}), do: {:duration, p} defp cast_to_add(left, right), do: Shared.merge_numeric_dtype(left, right) @@ -3334,8 +3400,10 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec subtract( - left :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t(), - right :: Series.t() | number() | Date.t() | NaiveDateTime.t() | Duration.t() + left :: + Series.t() | number() | Date.t() | DateTime.t() | NaiveDateTime.t() | Duration.t(), + right :: + Series.t() | number() | Date.t() | DateTime.t() | NaiveDateTime.t() | Duration.t() ) :: Series.t() def subtract(left, right) do [left, right] = cast_for_arithmetic("subtract/2", [left, right]) @@ -3349,8 +3417,10 @@ defmodule Explorer.Series do defp cast_to_subtract(:date, :date), do: {:duration, :millisecond} defp cast_to_subtract(:date, {:duration, _}), do: :date - defp cast_to_subtract({:datetime, p}, {:datetime, p}), do: {:duration, p} - defp cast_to_subtract({:datetime, p}, {:duration, p}), do: {:datetime, p} + defp cast_to_subtract({:naive_datetime, p}, {:naive_datetime, p}), do: {:duration, p} + defp cast_to_subtract({:naive_datetime, p}, {:duration, p}), do: {:naive_datetime, p} + defp cast_to_subtract({:datetime, p, tz}, {:datetime, p, tz}), do: {:duration, p} + defp cast_to_subtract({:datetime, p, tz}, {:duration, p}), do: {:datetime, p, tz} defp cast_to_subtract({:duration, p}, {:duration, p}), do: {:duration, p} defp cast_to_subtract(left, right), do: Shared.merge_numeric_dtype(left, right) @@ -4343,8 +4413,8 @@ defmodule Explorer.Series do defp cast_to_ordered_series(:date, %Date{}), do: :date defp cast_to_ordered_series(:time, %Time{}), do: :time - defp cast_to_ordered_series({:datetime, _}, %NaiveDateTime{}), - do: {:datetime, :microsecond} + defp cast_to_ordered_series({:naive_datetime, _}, %NaiveDateTime{}), + do: {:naive_datetime, :microsecond} defp cast_to_ordered_series({:duration, _}, value) when is_integer(value), @@ -6095,11 +6165,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec month(Series.t()) :: Series.t() - def month(%Series{dtype: dtype} = series) when K.in(dtype, @date_or_datetime_dtypes), + def month(%Series{dtype: dtype} = series) when is_date_like_dtype(dtype), do: apply_series_list(:month, [series]) def month(%Series{dtype: dtype}), - do: dtype_error("month/1", dtype, @date_or_datetime_dtypes) + do: super_dtype_error("month/1", dtype, [:date, :datetime, :naive_datetime]) @doc """ Returns the year number in the calendar date. @@ -6124,11 +6194,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec year(Series.t()) :: Series.t() - def year(%Series{dtype: dtype} = series) when K.in(dtype, @date_or_datetime_dtypes), + def year(%Series{dtype: dtype} = series) when is_date_like_dtype(dtype), do: apply_series_list(:year, [series]) def year(%Series{dtype: dtype}), - do: dtype_error("year/1", dtype, @date_or_datetime_dtypes) + do: super_dtype_error("year/1", dtype, [:date, :datetime, :naive_datetime]) @doc """ Returns the hour number from 0 to 23. @@ -6144,11 +6214,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec hour(Series.t()) :: Series.t() - def hour(%Series{dtype: dtype} = series) when K.in(dtype, @datetime_dtypes), + def hour(%Series{dtype: dtype} = series) when is_time_like_dtype(dtype), do: apply_series_list(:hour, [series]) def hour(%Series{dtype: dtype}), - do: dtype_error("hour/1", dtype, @datetime_dtypes) + do: super_dtype_error("hour/1", dtype, [:time, :datetime, :naive_datetime]) @doc """ Returns the minute number from 0 to 59. @@ -6164,11 +6234,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec minute(Series.t()) :: Series.t() - def minute(%Series{dtype: dtype} = series) when K.in(dtype, @datetime_dtypes), + def minute(%Series{dtype: dtype} = series) when is_time_like_dtype(dtype), do: apply_series_list(:minute, [series]) def minute(%Series{dtype: dtype}), - do: dtype_error("minute/1", dtype, @datetime_dtypes) + do: super_dtype_error("minute/1", dtype, [:time, :datetime, :naive_datetime]) @doc """ Returns the second number from 0 to 59. @@ -6184,11 +6254,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec second(Series.t()) :: Series.t() - def second(%Series{dtype: dtype} = series) when K.in(dtype, @datetime_dtypes), + def second(%Series{dtype: dtype} = series) when is_time_like_dtype(dtype), do: apply_series_list(:second, [series]) def second(%Series{dtype: dtype}), - do: dtype_error("minute/1", dtype, @datetime_dtypes) + do: super_dtype_error("minute/1", dtype, [:time, :datetime, :naive_datetime]) @doc """ Returns a day-of-week number starting from Monday = 1. (ISO 8601 weekday number) @@ -6214,11 +6284,11 @@ defmodule Explorer.Series do @doc type: :datetime_wise @spec day_of_week(Series.t()) :: Series.t() - def day_of_week(%Series{dtype: dtype} = series) when K.in(dtype, @date_or_datetime_dtypes), + def day_of_week(%Series{dtype: dtype} = series) when is_date_like_dtype(dtype), do: apply_series_list(:day_of_week, [series]) def day_of_week(%Series{dtype: dtype}), - do: dtype_error("day_of_week/1", dtype, @date_or_datetime_dtypes) + do: super_dtype_error("day_of_week/1", dtype, [:date, :datetime, :naive_datetime]) @doc """ Returns the day-of-year number starting from 1. @@ -6247,11 +6317,11 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec day_of_year(Series.t()) :: Series.t() - def day_of_year(%Series{dtype: dtype} = series) when K.in(dtype, @date_or_datetime_dtypes), + def day_of_year(%Series{dtype: dtype} = series) when is_date_like_dtype(dtype), do: apply_series_list(:day_of_year, [series]) def day_of_year(%Series{dtype: dtype}), - do: dtype_error("day_of_year/1", dtype, @date_or_datetime_dtypes) + do: super_dtype_error("day_of_year/1", dtype, [:date, :datetime, :naive_datetime]) @doc """ Returns the week-of-year number. @@ -6283,27 +6353,27 @@ defmodule Explorer.Series do """ @doc type: :datetime_wise @spec week_of_year(Series.t()) :: Series.t() - def week_of_year(%Series{dtype: dtype} = series) when K.in(dtype, @date_or_datetime_dtypes), + def week_of_year(%Series{dtype: dtype} = series) when is_date_like_dtype(dtype), do: apply_series_list(:week_of_year, [series]) def week_of_year(%Series{dtype: dtype}), - do: dtype_error("week_of_year/1", dtype, @date_or_datetime_dtypes) + do: super_dtype_error("week_of_year/1", dtype, [:date, :datetime, :naive_datetime]) @deprecated "Use cast(:date) instead" @doc type: :deprecated - def to_date(%Series{dtype: dtype} = series) when K.in(dtype, @datetime_dtypes), + def to_date(%Series{dtype: dtype} = series) when is_datetime_like_dtype(dtype), do: cast(series, :date) def to_date(%Series{dtype: dtype}), - do: dtype_error("to_date/1", dtype, @datetime_dtypes) + do: super_dtype_error("to_date/1", dtype, [:date, :datetime, :naive_datetime]) @deprecated "Use cast(:time) instead" @doc type: :deprecated - def to_time(%Series{dtype: dtype} = series) when K.in(dtype, @datetime_dtypes), + def to_time(%Series{dtype: dtype} = series) when is_datetime_like_dtype(dtype), do: cast(series, :time) def to_time(%Series{dtype: dtype}), - do: dtype_error("to_time/1", dtype, @datetime_dtypes) + do: super_dtype_error("to_time/1", dtype, [:date, :datetime, :naive_datetime]) @doc """ Join all string items in a sublist and place a separator between them. @@ -6550,14 +6620,23 @@ defmodule Explorer.Series do :"#{backend}.Series" end - defp dtype_error(function, dtype, valid_dtypes) when is_list(valid_dtypes) do + defp super_dtype_error(function, dtype, valid_super_dtypes) do raise( ArgumentError, "Explorer.Series.#{function} not implemented for dtype #{inspect(dtype)}. " <> - "Valid " <> Shared.inspect_dtypes(valid_dtypes, with_prefix: true) + "Valid dtypes are any subtype of #{inspect(valid_super_dtypes)}" ) end + defp dtype_error(function, dtype, valid_dtypes) when is_list(valid_dtypes) do + raise ArgumentError, dtype_error_message(function, dtype, valid_dtypes) + end + + defp dtype_error_message(function, dtype, valid_dtypes) when is_list(valid_dtypes) do + "Explorer.Series.#{function} not implemented for dtype #{inspect(dtype)}. " <> + "Valid " <> Shared.inspect_dtypes(valid_dtypes, with_prefix: true) + end + @spec dtype_mismatch_error(String.t(), any(), any(), [any()]) :: no_return() defp dtype_mismatch_error(function, left, right, valid) do left_series? = match?(%Series{}, left) diff --git a/lib/explorer/shared.ex b/lib/explorer/shared.ex index e406c0917..d179faf88 100644 --- a/lib/explorer/shared.ex +++ b/lib/explorer/shared.ex @@ -13,7 +13,14 @@ defmodule Explorer.Shared do {:u, 64} ] + @precisions [:millisecond, :microsecond, :nanosecond] + + @precision_types for d <- [:naive_datetime, :duration], + p <- @precisions, + do: {d, p} + @scalar_types @integer_types ++ + @precision_types ++ [ :null, :binary, @@ -23,13 +30,7 @@ defmodule Explorer.Shared do {:f, 32}, {:f, 64}, :string, - :time, - {:datetime, :microsecond}, - {:datetime, :millisecond}, - {:datetime, :nanosecond}, - {:duration, :microsecond}, - {:duration, :millisecond}, - {:duration, :nanosecond} + :time ] @doc """ @@ -71,6 +72,9 @@ defmodule Explorer.Shared do end) end + def normalise_dtype({:datetime, p, tz} = dtype) when p in @precisions and is_binary(tz), + do: dtype + def normalise_dtype(dtype) when dtype in @scalar_types, do: dtype def normalise_dtype(dtype) when dtype in [:float, :f64], do: {:f, 64} def normalise_dtype(dtype) when dtype in [:integer, :s64], do: {:s, 64} @@ -98,16 +102,14 @@ defmodule Explorer.Shared do end @doc """ - Supported datetime dtypes. + Supported naive datetime dtypes. """ - def datetime_types, - do: [{:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + def naive_datetime_types, do: for(p <- @precisions, do: {:naive_datetime, p}) @doc """ Supported duration dtypes. """ - def duration_types, - do: [{:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] + def duration_types, do: for(p <- @precisions, do: {:duration, p}) @doc """ Supported float dtypes. @@ -311,8 +313,10 @@ defmodule Explorer.Shared do defp infer_type(%Date{} = _item), do: :date defp infer_type(%Time{} = _item), do: :time - defp infer_type(%NaiveDateTime{} = _item), do: {:datetime, :microsecond} + defp infer_type(%DateTime{time_zone: tz} = _item), do: {:datetime, :microsecond, tz} + defp infer_type(%NaiveDateTime{} = _item), do: {:naive_datetime, :microsecond} defp infer_type(%Explorer.Duration{precision: precision} = _item), do: {:duration, precision} + defp infer_type(%_{} = item), do: raise(ArgumentError, "unsupported datatype: #{inspect(item)}") defp infer_type(item) when is_integer(item), do: {:s, 64} defp infer_type(item) when is_float(item) or item in @non_finite, do: {:f, 64} defp infer_type(item) when is_boolean(item), do: :boolean @@ -528,7 +532,8 @@ defmodule Explorer.Shared do :boolean -> {:u, 8} :date -> {:s, 32} :time -> {:s, 64} - {:datetime, _} -> {:s, 64} + {:naive_datetime, _} -> {:s, 64} + {:datetime, _, _} -> {:s, 64} {:duration, _} -> {:s, 64} _ -> :none end @@ -559,12 +564,9 @@ defmodule Explorer.Shared do @doc """ Converts dtype to its string representation. """ - def dtype_to_string({:datetime, :millisecond}), do: "datetime[ms]" - def dtype_to_string({:datetime, :microsecond}), do: "datetime[μs]" - def dtype_to_string({:datetime, :nanosecond}), do: "datetime[ns]" - def dtype_to_string({:duration, :millisecond}), do: "duration[ms]" - def dtype_to_string({:duration, :microsecond}), do: "duration[μs]" - def dtype_to_string({:duration, :nanosecond}), do: "duration[ns]" + def dtype_to_string({:naive_datetime, p}), do: "naive_datetime[#{precision_string(p)}]" + def dtype_to_string({:datetime, p, tz}), do: "datetime[#{precision_string(p)}, #{tz}]" + def dtype_to_string({:duration, p}), do: "duration[#{precision_string(p)}]" def dtype_to_string({:list, dtype}), do: "list[" <> dtype_to_string(dtype) <> "]" def dtype_to_string({:struct, fields}), do: "struct[#{length(fields)}]" def dtype_to_string({:f, size}), do: "f" <> Integer.to_string(size) @@ -572,6 +574,10 @@ defmodule Explorer.Shared do def dtype_to_string({:u, size}), do: "u" <> Integer.to_string(size) def dtype_to_string(other) when is_atom(other), do: Atom.to_string(other) + defp precision_string(:millisecond), do: "ms" + defp precision_string(:microsecond), do: "μs" + defp precision_string(:nanosecond), do: "ns" + @threshold 0.77 @max_suggestions 5 diff --git a/lib/explorer/tensor_frame.ex b/lib/explorer/tensor_frame.ex index 104b6296d..c9c122c70 100644 --- a/lib/explorer/tensor_frame.ex +++ b/lib/explorer/tensor_frame.ex @@ -91,9 +91,9 @@ if Code.ensure_loaded?(Nx) do * `{:f, 64}` * `:boolean` * `:date` - * `{:datetime, :millisecond}` - * `{:datetime, :microsecond}` - * `{:datetime, :nanosecond}` + * `{:naive_datetime, :millisecond}` + * `{:naive_datetime, :microsecond}` + * `{:naive_datetime, :nanosecond}` See `Explorer.Series.to_iovec/1` and `Explorer.Series.to_tensor/1` to learn more about their internal representation. diff --git a/mix.exs b/mix.exs index 511f65e20..eee72db7c 100644 --- a/mix.exs +++ b/mix.exs @@ -51,6 +51,7 @@ defmodule Explorer.MixProject do ## Test {:bypass, "~> 2.1", only: :test}, {:stream_data, "~> 0.6", only: :test}, + {:tz, "~> 0.26", only: :test}, ## Dev {:ex_doc, "~> 0.24", only: :dev}, diff --git a/mix.lock b/mix.lock index a9fa52964..d5889d9e7 100644 --- a/mix.lock +++ b/mix.lock @@ -35,4 +35,5 @@ "table_rex": {:hex, :table_rex, "4.0.0", "3c613a68ebdc6d4d1e731bc973c233500974ec3993c99fcdabb210407b90959b", [:mix], [], "hexpm", "c35c4d5612ca49ebb0344ea10387da4d2afe278387d4019e4d8111e815df8f55"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, + "tz": {:hex, :tz, "0.26.5", "bfe8efa345670f90351c5c31d22455d0307c5d9895fbdede7deeb215a7b60dbe", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: true]}], "hexpm", "c4f9392d710582c7108b6b8c635f4981120ec4b2072adbd242290fc842338183"}, } diff --git a/native/explorer/Cargo.lock b/native/explorer/Cargo.lock index df9189b07..67d03a394 100644 --- a/native/explorer/Cargo.lock +++ b/native/explorer/Cargo.lock @@ -283,7 +283,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", - "chrono-tz-build", + "chrono-tz-build 0.2.1", + "phf", +] + +[[package]] +name = "chrono-tz" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb" +dependencies = [ + "chrono", + "chrono-tz-build 0.3.0", "phf", ] @@ -298,6 +309,17 @@ dependencies = [ "phf_codegen", ] +[[package]] +name = "chrono-tz-build" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + [[package]] name = "cmake" version = "0.1.50" @@ -452,6 +474,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "chrono-tz 0.9.0", "either", "mimalloc", "object_store", @@ -1346,7 +1369,7 @@ dependencies = [ "atoi_simd", "bytemuck", "chrono", - "chrono-tz", + "chrono-tz 0.8.6", "dyn-clone", "either", "ethnum", @@ -1407,7 +1430,7 @@ dependencies = [ "bitflags 2.5.0", "bytemuck", "chrono", - "chrono-tz", + "chrono-tz 0.8.6", "either", "hashbrown", "indexmap", @@ -1452,6 +1475,7 @@ dependencies = [ "atoi_simd", "bytes", "chrono", + "chrono-tz 0.8.6", "fast-float", "flate2", "futures", @@ -1542,7 +1566,7 @@ dependencies = [ "base64", "bytemuck", "chrono", - "chrono-tz", + "chrono-tz 0.8.6", "either", "hashbrown", "hex", @@ -1627,7 +1651,7 @@ checksum = "ff48362bd1b078bbbec7e7ba9ec01fea58fee2887db22a8e3deaf78f322fa3c4" dependencies = [ "ahash", "bytemuck", - "chrono-tz", + "chrono-tz 0.8.6", "futures", "once_cell", "percent-encoding", @@ -1684,7 +1708,7 @@ checksum = "86eb74ea6ddfe675aa5c3f33c00dadbe2b85f0e8e3887b85db1fd5a3397267fd" dependencies = [ "atoi", "chrono", - "chrono-tz", + "chrono-tz 0.8.6", "now", "once_cell", "polars-arrow", diff --git a/native/explorer/Cargo.toml b/native/explorer/Cargo.toml index b101e1e0b..2fed20cf6 100644 --- a/native/explorer/Cargo.toml +++ b/native/explorer/Cargo.toml @@ -14,6 +14,7 @@ crate-type = ["cdylib"] [dependencies] anyhow = "1" chrono = "0.4" +chrono-tz = "0.9" rand = { version = "0.8", features = ["alloc"] } rand_pcg = "0.3" rustler = { version = "0.32", default-features = false, features = ["derive"] } @@ -43,44 +44,45 @@ features = [ "abs", "checked_arithmetic", "concat_str", + "cov", "cross_join", + "csv", "cum_agg", "cutqcut", - "csv", - "cov", "decompress-fast", "describe", "dtype-full", + "ewma", + "extract_groups", + "extract_jsonpath", "group_by_list", - "ipc", "ipc_streaming", + "ipc", + "is_in", "lazy", - "regex", "log", "mode", + "moment", "parquet", + "peaks", "performant", "pivot", + "product", + "propagate_nans", "random", + "range", + "rank", + "regex", "rolling_window", + "round_series", "rows", "simd", + "streaming", + "strings", "temporal", + "timezones", "to_dummies", "trigonometry", - "is_in", - "streaming", - "strings", - "round_series", - "ewma", - "product", - "peaks", - "moment", - "range", - "rank", - "propagate_nans", - "extract_jsonpath", - "extract_groups", ] [dependencies.polars-ops] @@ -90,7 +92,14 @@ features = ["abs", "ewma", "cum_agg", "cov"] [features] default = ["ndjson", "cloud", "nif_version_2_15"] -cloud = ["object_store", "tokio", "tokio-util", "aws", "polars/cloud", "polars/cloud_write"] +cloud = [ + "object_store", + "tokio", + "tokio-util", + "aws", + "polars/cloud", + "polars/cloud_write", +] ndjson = ["polars/json"] aws = ["object_store/aws", "polars/async", "polars/aws"] diff --git a/native/explorer/src/datatypes.rs b/native/explorer/src/datatypes.rs index 619bfdd66..e9b515161 100644 --- a/native/explorer/src/datatypes.rs +++ b/native/explorer/src/datatypes.rs @@ -17,6 +17,12 @@ use std::str::FromStr; #[cfg(feature = "aws")] use polars::prelude::cloud::AmazonS3ConfigKey as S3Key; +// TODO: we'll need these again when we resolve the lifetime issues with +// `ExDateTime` below. +// use chrono_tz::OffsetComponents; +// use chrono_tz::OffsetName; +use chrono_tz::Tz; + pub use ex_dtypes::*; pub struct ExDataFrameRef(pub DataFrame); @@ -257,22 +263,20 @@ fn time_unit_of_ex_duration(duration: &ExDuration) -> TimeUnit { #[derive(NifStruct, Copy, Clone, Debug)] #[module = "NaiveDateTime"] -pub struct ExDateTime { +pub struct ExNaiveDateTime { pub calendar: Atom, pub day: u32, - pub month: u32, - pub year: i32, pub hour: u32, + pub microsecond: (u32, u32), pub minute: u32, + pub month: u32, pub second: u32, - pub microsecond: (u32, u32), + pub year: i32, } pub use polars::export::arrow::temporal_conversions::date32_to_date as days_to_date; -/// Converts a microsecond i64 to a `NaiveDateTime`. -/// This is because when getting a timestamp, it might have negative values. -pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime { +pub fn timestamp_to_datetime_utc(microseconds: i64) -> DateTime { let sign = microseconds.signum(); let seconds = match sign { -1 => microseconds / 1_000_000 - 1, @@ -285,7 +289,12 @@ pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime { let nanoseconds = remainder.abs() * 1_000; DateTime::::from_timestamp(seconds, nanoseconds.try_into().unwrap()) .expect("construct a UTC") - .naive_utc() +} + +/// Converts a microsecond i64 to a `NaiveDateTime`. +/// This is because when getting a timestamp, it might have negative values. +pub fn timestamp_to_datetime(microseconds: i64) -> NaiveDateTime { + timestamp_to_datetime_utc(microseconds).naive_utc() } // Limit the number of digits in the microsecond part of a timestamp to 6. @@ -299,14 +308,14 @@ fn microseconds_six_digits(microseconds: u32) -> u32 { } } -impl From for ExDateTime { +impl From for ExNaiveDateTime { fn from(microseconds: i64) -> Self { timestamp_to_datetime(microseconds).into() } } -impl From for i64 { - fn from(dt: ExDateTime) -> i64 { +impl From for i64 { + fn from(dt: ExNaiveDateTime) -> i64 { let duration = NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day) .unwrap() .and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0) @@ -325,8 +334,8 @@ impl From for i64 { } } -impl From for NaiveDateTime { - fn from(dt: ExDateTime) -> NaiveDateTime { +impl From for NaiveDateTime { + fn from(dt: ExNaiveDateTime) -> NaiveDateTime { NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day) .unwrap() .and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0) @@ -334,9 +343,9 @@ impl From for NaiveDateTime { } } -impl From for ExDateTime { +impl From for ExNaiveDateTime { fn from(dt: NaiveDateTime) -> Self { - ExDateTime { + ExNaiveDateTime { calendar: atoms::calendar_iso_module(), day: dt.day(), month: dt.month(), @@ -352,12 +361,107 @@ impl From for ExDateTime { } } -impl Literal for ExDateTime { +impl Literal for ExNaiveDateTime { fn lit(self) -> Expr { NaiveDateTime::from(self).lit() } } +#[derive(NifStruct, Copy, Clone, Debug)] +#[module = "DateTime"] +pub struct ExDateTime<'a> { + pub calendar: Atom, + pub day: u32, + pub hour: u32, + pub microsecond: (u32, u32), + pub minute: u32, + pub month: u32, + pub second: u32, + pub std_offset: i64, + pub time_zone: &'a str, + pub utc_offset: i64, + pub year: i32, + pub zone_abbr: &'a str, +} + +// impl From for ExDateTime<'_> { +// fn from(microseconds: i64) -> Self { +// timestamp_to_datetime_utc(microseconds).into() +// } +// } + +impl From> for i64 { + fn from(dt: ExDateTime<'_>) -> i64 { + let duration = NaiveDate::from_ymd_opt(dt.year, dt.month, dt.day) + .unwrap() + .and_hms_micro_opt(dt.hour, dt.minute, dt.second, dt.microsecond.0) + .unwrap() + .signed_duration_since( + NaiveDate::from_ymd_opt(1970, 1, 1) + .unwrap() + .and_hms_opt(0, 0, 0) + .unwrap(), + ); + + match duration.num_microseconds() { + Some(us) => us, + None => duration.num_milliseconds() * 1_000, + } + } +} + +// TODO: resolve the lifetime issues for `time_zone` and `zone_abbr`. +// +// impl<'a> From> for ExDateTime<'a> { +// fn from(dt_tz: DateTime) -> ExDateTime<'a> { +// let & time_zone = dt_tz.offset().tz_id(); +// let & zone_abbr = dt_tz.offset().abbreviation(); +// +// ExDateTime { +// calendar: atoms::calendar_iso_module(), +// day: dt_tz.day(), +// hour: dt_tz.hour(), +// microsecond: (microseconds_six_digits(dt_tz.timestamp_subsec_micros()), 6), +// minute: dt_tz.minute(), +// month: dt_tz.month(), +// second: dt_tz.second(), +// std_offset: dt_tz.offset().dst_offset().num_seconds(), +// time_zone: time_zone, +// utc_offset: dt_tz.offset().base_utc_offset().num_seconds(), +// year: dt_tz.year(), +// zone_abbr: zone_abbr, +// } +// } +// } + +impl From> for DateTime { + fn from(ex_dt: ExDateTime<'_>) -> DateTime { + let time_zone = ex_dt.time_zone.parse::().unwrap(); + + // Best approach I could find to avoid warning: + // https://github.com/chronotope/chrono/issues/873#issuecomment-1333716953 + let dt_tz_without_micro = time_zone + .with_ymd_and_hms( + ex_dt.year, + ex_dt.month, + ex_dt.day, + ex_dt.hour, + ex_dt.minute, + ex_dt.second, + ) + .unwrap(); + let micro = chrono::Duration::microseconds(ex_dt.microsecond.0.into()); + dt_tz_without_micro + micro + } +} + +// TODO: Polars doesn't provide a default `Literal` impl. Find out why. +// impl Literal for ExDateTime<'_> { +// fn lit(self) -> Expr { +// DateTime::from(self).lit() +// } +// } + #[derive(NifStruct, Copy, Clone, Debug)] #[module = "Time"] pub struct ExTime { @@ -433,7 +537,7 @@ pub enum ExValidValue<'a> { Str(&'a str), Date(ExDate), Time(ExTime), - DateTime(ExDateTime), + DateTime(ExNaiveDateTime), Duration(ExDuration), } @@ -476,7 +580,7 @@ impl<'a> rustler::Decoder<'a> for ExValidValue<'a> { Ok(ExValidValue::Date(date)) } else if let Ok(time) = term.decode::() { Ok(ExValidValue::Time(time)) - } else if let Ok(datetime) = term.decode::() { + } else if let Ok(datetime) = term.decode::() { Ok(ExValidValue::DateTime(datetime)) } else if let Ok(duration) = term.decode::() { Ok(ExValidValue::Duration(duration)) diff --git a/native/explorer/src/datatypes/ex_dtypes.rs b/native/explorer/src/datatypes/ex_dtypes.rs index b52cd8cc1..87249bfb5 100644 --- a/native/explorer/src/datatypes/ex_dtypes.rs +++ b/native/explorer/src/datatypes/ex_dtypes.rs @@ -40,6 +40,16 @@ impl TryFrom<&ExTimeUnit> for TimeUnit { } } +impl From<&TimeUnit> for ExTimeUnit { + fn from(value: &TimeUnit) -> ExTimeUnit { + match value { + TimeUnit::Milliseconds => ExTimeUnit::Millisecond, + TimeUnit::Microseconds => ExTimeUnit::Microsecond, + TimeUnit::Nanoseconds => ExTimeUnit::Nanosecond, + } + } +} + #[derive(NifTaggedEnum)] pub enum ExSeriesDtype { Null, @@ -52,7 +62,8 @@ pub enum ExSeriesDtype { U(u8), String, Time, - Datetime(ExTimeUnit), + NaiveDatetime(ExTimeUnit), + Datetime(ExTimeUnit, String), Duration(ExTimeUnit), List(Box), Struct(Vec<(String, ExSeriesDtype)>), @@ -82,25 +93,11 @@ impl TryFrom<&DataType> for ExSeriesDtype { DataType::Time => Ok(ExSeriesDtype::Time), DataType::String => Ok(ExSeriesDtype::String), - DataType::Datetime(TimeUnit::Nanoseconds, _) => { - Ok(ExSeriesDtype::Datetime(ExTimeUnit::Nanosecond)) - } - DataType::Datetime(TimeUnit::Microseconds, _) => { - Ok(ExSeriesDtype::Datetime(ExTimeUnit::Microsecond)) - } - DataType::Datetime(TimeUnit::Milliseconds, _) => { - Ok(ExSeriesDtype::Datetime(ExTimeUnit::Millisecond)) - } - - DataType::Duration(TimeUnit::Nanoseconds) => { - Ok(ExSeriesDtype::Duration(ExTimeUnit::Nanosecond)) - } - DataType::Duration(TimeUnit::Microseconds) => { - Ok(ExSeriesDtype::Duration(ExTimeUnit::Microsecond)) - } - DataType::Duration(TimeUnit::Milliseconds) => { - Ok(ExSeriesDtype::Duration(ExTimeUnit::Millisecond)) + DataType::Datetime(tu, None) => Ok(ExSeriesDtype::NaiveDatetime(tu.into())), + DataType::Datetime(tu, Some(tz)) => { + Ok(ExSeriesDtype::Datetime(tu.into(), tz.to_string())) } + DataType::Duration(tu) => Ok(ExSeriesDtype::Duration(tu.into())), DataType::List(inner) => Ok(ExSeriesDtype::List(Box::new(Self::try_from( inner.as_ref(), @@ -158,24 +155,14 @@ impl TryFrom<&ExSeriesDtype> for DataType { ))), ExSeriesDtype::String => Ok(DataType::String), ExSeriesDtype::Time => Ok(DataType::Time), - ExSeriesDtype::Datetime(ExTimeUnit::Nanosecond) => { - Ok(DataType::Datetime(TimeUnit::Nanoseconds, None)) - } - ExSeriesDtype::Datetime(ExTimeUnit::Microsecond) => { - Ok(DataType::Datetime(TimeUnit::Microseconds, None)) - } - ExSeriesDtype::Datetime(ExTimeUnit::Millisecond) => { - Ok(DataType::Datetime(TimeUnit::Milliseconds, None)) - } - ExSeriesDtype::Duration(ExTimeUnit::Nanosecond) => { - Ok(DataType::Duration(TimeUnit::Nanoseconds)) - } - ExSeriesDtype::Duration(ExTimeUnit::Microsecond) => { - Ok(DataType::Duration(TimeUnit::Microseconds)) - } - ExSeriesDtype::Duration(ExTimeUnit::Millisecond) => { - Ok(DataType::Duration(TimeUnit::Milliseconds)) + ExSeriesDtype::NaiveDatetime(ex_timeunit) => { + Ok(DataType::Datetime(ex_timeunit.try_into()?, None)) } + ExSeriesDtype::Datetime(ex_timeunit, tz_option) => Ok(DataType::Datetime( + ex_timeunit.try_into()?, + Some(tz_option.clone()), + )), + ExSeriesDtype::Duration(ex_timeunit) => Ok(DataType::Duration(ex_timeunit.try_into()?)), ExSeriesDtype::List(inner) => { Ok(DataType::List(Box::new(Self::try_from(inner.as_ref())?))) } diff --git a/native/explorer/src/encoding.rs b/native/explorer/src/encoding.rs index 88ac2372e..6145cae30 100644 --- a/native/explorer/src/encoding.rs +++ b/native/explorer/src/encoding.rs @@ -4,9 +4,13 @@ use rustler::{Encoder, Env, NewBinary, OwnedBinary, ResourceArc, Term}; use std::collections::HashMap; use std::{mem, slice}; +use chrono_tz::OffsetComponents; +use chrono_tz::OffsetName; +use chrono_tz::Tz; + use crate::atoms::{ self, calendar, day, hour, infinity, microsecond, millisecond, minute, month, nan, nanosecond, - neg_infinity, precision, second, value, year, + neg_infinity, precision, second, std_offset, time_zone, utc_offset, value, year, zone_abbr, }; use crate::datatypes::{ days_to_date, time64ns_to_time, timestamp_to_datetime, ExSeries, ExSeriesRef, @@ -103,7 +107,7 @@ fn date_series_to_list<'b>(s: &Series, env: Env<'b>) -> Result, Explore )) } -macro_rules! unsafe_encode_datetime { +macro_rules! unsafe_encode_naive_datetime { ($v: expr, $naive_datetime_struct_keys: ident, $calendar_iso_module: ident, $naive_datetime_module: ident, $env: ident) => {{ let dt = timestamp_to_datetime($v); let microseconds = dt.and_utc().timestamp_subsec_micros(); @@ -158,7 +162,7 @@ fn naive_datetime_struct_keys(env: Env) -> [NIF_TERM; 9] { } #[inline] -fn datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 { +fn naive_datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 { match time_unit { TimeUnit::Milliseconds => v * 1000, TimeUnit::Microseconds => v, @@ -167,13 +171,13 @@ fn datetime_to_microseconds(v: i64, time_unit: TimeUnit) -> i64 { } #[inline] -pub fn encode_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result { +pub fn encode_naive_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result { let naive_datetime_struct_keys = &naive_datetime_struct_keys(env); let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg(); let naive_datetime_module = atoms::naive_datetime_module().encode(env).as_c_arg(); - let microseconds_time = datetime_to_microseconds(v, time_unit); + let microseconds_time = naive_datetime_to_microseconds(v, time_unit); - Ok(unsafe_encode_datetime!( + Ok(unsafe_encode_naive_datetime!( microseconds_time, naive_datetime_struct_keys, calendar_iso_module, @@ -183,7 +187,7 @@ pub fn encode_datetime(v: i64, time_unit: TimeUnit, env: Env) -> Result( +fn naive_datetime_series_to_list<'b>( s: &Series, time_unit: TimeUnit, env: Env<'b>, @@ -196,9 +200,9 @@ fn datetime_series_to_list<'b>( env, s.datetime()?.into_iter().map(|option| option .map(|v| { - let microseconds_time = datetime_to_microseconds(v, time_unit); + let microseconds_time = naive_datetime_to_microseconds(v, time_unit); - unsafe_encode_datetime!( + unsafe_encode_naive_datetime!( microseconds_time, naive_datetime_struct_keys, calendar_iso_module, @@ -210,6 +214,134 @@ fn datetime_series_to_list<'b>( )) } +macro_rules! unsafe_encode_datetime { + ( + $v: expr, + $time_zone: expr, + $datetime_struct_keys: ident, + $calendar_iso_module: ident, + $datetime_module: ident, + $env: ident + ) => {{ + let ndt = timestamp_to_datetime($v); + let microseconds = ndt.and_utc().timestamp_subsec_micros(); + let tz = $time_zone.parse::().unwrap(); + let dt_tz = tz.from_local_datetime(&ndt).unwrap(); + let tz_offset = dt_tz.offset(); + + // Limit the number of digits in the microsecond part of a timestamp to 6. + // This is necessary because the microsecond part of Elixir is only 6 digits. + let limited_ms = if microseconds > 999_999 { + 999_999 + } else { + microseconds + }; + + unsafe { + Term::new( + $env, + map::make_map_from_arrays( + $env.as_c_arg(), + $datetime_struct_keys, + &[ + $datetime_module, + $calendar_iso_module, + dt_tz.day().encode($env).as_c_arg(), + dt_tz.hour().encode($env).as_c_arg(), + (limited_ms, 6).encode($env).as_c_arg(), + dt_tz.minute().encode($env).as_c_arg(), + dt_tz.month().encode($env).as_c_arg(), + dt_tz.second().encode($env).as_c_arg(), + tz_offset.dst_offset().num_seconds().encode($env).as_c_arg(), + $time_zone.to_string().encode($env).as_c_arg(), + tz_offset + .base_utc_offset() + .num_seconds() + .encode($env) + .as_c_arg(), + dt_tz.year().encode($env).as_c_arg(), + tz_offset.abbreviation().encode($env).as_c_arg(), + ], + ) + .unwrap(), + ) + } + }}; +} + +// Here we build the DateTime struct manually, as it's much faster than using NifStruct +// This is because we already have the keys (we know this at compile time), and the types, +// so we can build the struct directly. +fn datetime_struct_keys(env: Env) -> [NIF_TERM; 13] { + return [ + atom::__struct__().encode(env).as_c_arg(), + calendar().encode(env).as_c_arg(), + day().encode(env).as_c_arg(), + hour().encode(env).as_c_arg(), + microsecond().encode(env).as_c_arg(), + minute().encode(env).as_c_arg(), + month().encode(env).as_c_arg(), + second().encode(env).as_c_arg(), + std_offset().encode(env).as_c_arg(), + time_zone().encode(env).as_c_arg(), + utc_offset().encode(env).as_c_arg(), + year().encode(env).as_c_arg(), + zone_abbr().encode(env).as_c_arg(), + ]; +} + +#[inline] +pub fn encode_datetime( + v: i64, + time_unit: TimeUnit, + time_zone: String, + env: Env, +) -> Result { + let datetime_struct_keys = &datetime_struct_keys(env); + let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg(); + let datetime_module = atoms::datetime_module().encode(env).as_c_arg(); + let microseconds_time = naive_datetime_to_microseconds(v, time_unit); + + Ok(unsafe_encode_datetime!( + microseconds_time, + time_zone, + datetime_struct_keys, + calendar_iso_module, + datetime_module, + env + )) +} + +#[inline] +fn datetime_series_to_list<'b>( + s: &Series, + time_unit: TimeUnit, + time_zone: String, + env: Env<'b>, +) -> Result, ExplorerError> { + let datetime_struct_keys = &datetime_struct_keys(env); + let calendar_iso_module = atoms::calendar_iso_module().encode(env).as_c_arg(); + let datetime_module = atoms::datetime_module().encode(env).as_c_arg(); + + Ok(unsafe_iterator_series_to_list!( + env, + s.datetime()?.into_iter().map(|option| option + .map(|v| { + let microseconds_time = naive_datetime_to_microseconds(v, time_unit); + + unsafe_encode_datetime!( + microseconds_time, + time_zone, + datetime_struct_keys, + calendar_iso_module, + datetime_module, + env + ) + }) + .encode(env)) + )) +} + fn time_unit_to_atom(time_unit: TimeUnit) -> atom::Atom { match time_unit { TimeUnit::Milliseconds => millisecond(), @@ -565,7 +697,10 @@ pub fn term_from_value<'b>(v: AnyValue, env: Env<'b>) -> Result, Explor AnyValue::Float64(v) => Ok(Some(term_from_float64(v, env)).encode(env)), AnyValue::Date(v) => encode_date(v, env), AnyValue::Time(v) => encode_time(v, env), - AnyValue::Datetime(v, time_unit, None) => encode_datetime(v, time_unit, env), + AnyValue::Datetime(v, time_unit, None) => encode_naive_datetime(v, time_unit, env), + AnyValue::Datetime(v, time_unit, Some(time_zone)) => { + encode_datetime(v, time_unit, time_zone.to_string(), env) + } AnyValue::Duration(v, time_unit) => encode_duration(v, time_unit, env), AnyValue::Categorical(idx, mapping, _) => Ok(mapping.get(idx).encode(env)), AnyValue::List(series) => list_from_series(ExSeries::new(series), env), @@ -599,7 +734,10 @@ pub fn list_from_series(s: ExSeries, env: Env) -> Result { DataType::Date => date_series_to_list(&s, env), DataType::Time => time_series_to_list(&s, env), - DataType::Datetime(time_unit, None) => datetime_series_to_list(&s, *time_unit, env), + DataType::Datetime(time_unit, None) => naive_datetime_series_to_list(&s, *time_unit, env), + DataType::Datetime(time_unit, Some(time_zone)) => { + datetime_series_to_list(&s, *time_unit, time_zone.clone().to_string(), env) + } DataType::Duration(time_unit) => duration_series_to_list(&s, *time_unit, env), DataType::Binary => generic_binary_series_to_list(&s.resource, &s, env), DataType::String => generic_string_series_to_list(&s, env), diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 47d43b9bb..af1301d34 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -10,7 +10,8 @@ use polars::prelude::{GetOutput, IntoSeries, Utf8JsonPathImpl}; use polars::series::Series; use crate::datatypes::{ - ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExValidValue, + ExCorrelationMethod, ExDate, ExDuration, ExNaiveDateTime, ExRankMethod, ExSeriesDtype, + ExValidValue, }; use crate::series::{cast_str_to_f64, ewm_opts, rolling_opts}; use crate::{ExDataFrame, ExExpr, ExSeries}; @@ -71,7 +72,7 @@ pub fn expr_date(date: ExDate) -> ExExpr { } #[rustler::nif] -pub fn expr_datetime(datetime: ExDateTime) -> ExExpr { +pub fn expr_datetime(datetime: ExNaiveDateTime) -> ExExpr { ExExpr::new(datetime.lit()) } diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index 7f9b8cd13..db726001d 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -50,6 +50,7 @@ mod atoms { rustler::atoms! { calendar_iso_module = "Elixir.Calendar.ISO", date_module = "Elixir.Date", + datetime_module = "Elixir.DateTime", duration_module = "Elixir.Explorer.Duration", naive_datetime_module = "Elixir.NaiveDateTime", time_module = "Elixir.Time", @@ -67,7 +68,11 @@ mod atoms { calendar, nan, infinity, - neg_infinity + neg_infinity, + std_offset, + time_zone, + utc_offset, + zone_abbr, } } @@ -403,6 +408,7 @@ rustler::init!( s_from_list_bool, s_from_list_date, s_from_list_time, + s_from_list_naive_datetime, s_from_list_datetime, s_from_list_duration, s_from_list_f32, diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index 302d494bd..cad789b3f 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1,13 +1,14 @@ use crate::{ atoms, datatypes::{ - ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExTime, - ExTimeUnit, ExValidValue, + ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExNaiveDateTime, ExRankMethod, + ExSeriesDtype, ExTime, ExTimeUnit, ExValidValue, }, encoding, ExDataFrame, ExSeries, ExplorerError, }; -use encoding::encode_datetime; +use encoding::encode_naive_datetime; +// use encoding::encode_datetime; use polars::prelude::*; use polars_ops::chunked_array::cov::{cov, pearson_corr}; @@ -106,20 +107,35 @@ pub fn s_from_list_date(name: &str, val: Vec>) -> ExSeries { ) } -// TODO: Phase out this function in favor of the `ExTimeUnit` enum. -// See `s_strptime` for an example. -fn precision_to_timeunit(precision: &str) -> TimeUnit { - match precision { - "millisecond" => TimeUnit::Milliseconds, - "microsecond" => TimeUnit::Microseconds, - "nanosecond" => TimeUnit::Nanoseconds, - _ => panic!("Unknown datetime precision"), - } +#[rustler::nif(schedule = "DirtyCpu")] +pub fn s_from_list_naive_datetime( + name: &str, + val: Vec>, + precision: ExTimeUnit, +) -> ExSeries { + let timeunit = TimeUnit::try_from(&precision).unwrap(); + + ExSeries::new( + Series::new( + name, + val.iter() + .map(|dt| dt.map(|dt| dt.into())) + .collect::>>(), + ) + .cast(&DataType::Datetime(timeunit, None)) + .unwrap(), + ) } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_from_list_datetime(name: &str, val: Vec>, precision: &str) -> ExSeries { - let timeunit = precision_to_timeunit(precision); +pub fn s_from_list_datetime( + name: &str, + val: Vec>, + precision: ExTimeUnit, + time_zone_str: Option<&str>, +) -> ExSeries { + let timeunit = TimeUnit::try_from(&precision).unwrap(); + let time_zone = time_zone_str.map(|s| s.to_string()); ExSeries::new( Series::new( @@ -128,14 +144,18 @@ pub fn s_from_list_datetime(name: &str, val: Vec>, precision: .map(|dt| dt.map(|dt| dt.into())) .collect::>>(), ) - .cast(&DataType::Datetime(timeunit, None)) + .cast(&DataType::Datetime(timeunit, time_zone)) .unwrap(), ) } #[rustler::nif(schedule = "DirtyCpu")] -pub fn s_from_list_duration(name: &str, val: Vec>, precision: &str) -> ExSeries { - let timeunit = precision_to_timeunit(precision); +pub fn s_from_list_duration( + name: &str, + val: Vec>, + precision: ExTimeUnit, +) -> ExSeries { + let timeunit = TimeUnit::try_from(&precision).unwrap(); ExSeries::new( Series::new( @@ -749,7 +769,7 @@ pub fn s_fill_missing_with_date(series: ExSeries, date: ExDate) -> Result Result { let s = series .datetime()? @@ -988,7 +1008,7 @@ pub fn s_min(env: Env, s: ExSeries) -> Result { DataType::Time => Ok(s.min::()?.map(ExTime::from).encode(env)), DataType::Datetime(unit, _) => Ok(s .min::()? - .map(|v| encode_datetime(v, *unit, env).unwrap()) + .map(|v| encode_naive_datetime(v, *unit, env).unwrap()) .encode(env)), dt => panic!("min/1 not implemented for {dt:?}"), } @@ -1010,7 +1030,7 @@ pub fn s_max(env: Env, s: ExSeries) -> Result { DataType::Time => Ok(s.max::()?.map(ExTime::from).encode(env)), DataType::Datetime(unit, _) => Ok(s .max::()? - .map(|v| encode_datetime(v, *unit, env).unwrap()) + .map(|v| encode_naive_datetime(v, *unit, env).unwrap()) .encode(env)), dt => panic!("max/1 not implemented for {dt:?}"), } @@ -1210,8 +1230,8 @@ pub fn s_quantile<'a>( Some(microseconds) => Ok(ExTime::from(microseconds as i64).encode(env)), }, DataType::Datetime(unit, None) => match s.datetime()?.quantile(quantile, strategy)? { - None => Ok(None::.encode(env)), - Some(time) => Ok(encode_datetime(time as i64, *unit, env) + None => Ok(None::.encode(env)), + Some(time) => Ok(encode_naive_datetime(time as i64, *unit, env) .unwrap() .encode(env)), }, @@ -1679,7 +1699,7 @@ pub fn s_strptime( ) -> Result { let timeunit = match precision { None => TimeUnit::Microseconds, - Some(precision) => TimeUnit::try_from(&precision)?, + Some(precision) => TimeUnit::try_from(&precision).unwrap(), }; let s1 = s diff --git a/test/explorer/data_frame/csv_test.exs b/test/explorer/data_frame/csv_test.exs index 1882fffa7..8e4fe5925 100644 --- a/test/explorer/data_frame/csv_test.exs +++ b/test/explorer/data_frame/csv_test.exs @@ -123,9 +123,9 @@ defmodule Explorer.DataFrame.CSVTest do assert_csv(:date, "1960-01-31", ~D[1960-01-31]) end - test "datetime" do + test "naive datetime" do assert_csv( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, "2022-10-01T11:34:10.123456", ~N[2022-10-01 11:34:10.123456] ) @@ -192,7 +192,7 @@ defmodule Explorer.DataFrame.CSVTest do """) df = DF.from_csv!(csv, parse_dates: true) - assert %{"c" => {:datetime, :microsecond}} = Explorer.DataFrame.dtypes(df) + assert %{"c" => {:naive_datetime, :microsecond}} = Explorer.DataFrame.dtypes(df) assert DF.to_columns(df, atom_keys: true) == %{ a: [1, 3], diff --git a/test/explorer/data_frame/ipc_stream_test.exs b/test/explorer/data_frame/ipc_stream_test.exs index 71774d6e0..69b3c0798 100644 --- a/test/explorer/data_frame/ipc_stream_test.exs +++ b/test/explorer/data_frame/ipc_stream_test.exs @@ -100,15 +100,15 @@ defmodule Explorer.DataFrame.IPCStreamTest do assert_ipc_stream(:date, 0, ~D[1970-01-01]) end - test "datetime" do + test "naive datetime" do assert_ipc_stream( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, ~N[2022-10-01 11:34:10.123456], ~N[2022-10-01 11:34:10.123456] ) assert_ipc_stream( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, 0, ~N[1970-01-01 00:00:00.000000] ) diff --git a/test/explorer/data_frame/ipc_test.exs b/test/explorer/data_frame/ipc_test.exs index c55f6064e..e0e45d14a 100644 --- a/test/explorer/data_frame/ipc_test.exs +++ b/test/explorer/data_frame/ipc_test.exs @@ -101,11 +101,15 @@ defmodule Explorer.DataFrame.IPCTest do assert_ipc(:date, ~D[2022-12-01], ~D[2022-12-01]) end - test "datetime" do - assert_ipc({:datetime, :microsecond}, 1_664_624_050_123_456, ~N[2022-10-01 11:34:10.123456]) + test "naive datetime" do + assert_ipc( + {:naive_datetime, :microsecond}, + 1_664_624_050_123_456, + ~N[2022-10-01 11:34:10.123456] + ) assert_ipc( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, ~N[2022-10-01 11:34:10.123456], ~N[2022-10-01 11:34:10.123456] ) diff --git a/test/explorer/data_frame/parquet_test.exs b/test/explorer/data_frame/parquet_test.exs index 565bfcf0f..1ac75e1f7 100644 --- a/test/explorer/data_frame/parquet_test.exs +++ b/test/explorer/data_frame/parquet_test.exs @@ -200,15 +200,15 @@ defmodule Explorer.DataFrame.ParquetTest do assert_parquet(:date, ~D[2022-12-01], ~D[2022-12-01]) end - test "datetime" do + test "naive datetime" do assert_parquet( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, 1_664_624_050_123_456, ~N[2022-10-01 11:34:10.123456] ) assert_parquet( - {:datetime, :microsecond}, + {:naive_datetime, :microsecond}, ~N[2022-10-01 11:34:10.123456], ~N[2022-10-01 11:34:10.123456] ) diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 66d07d3e0..0bda80d6d 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -861,7 +861,7 @@ defmodule Explorer.DataFrameTest do "g" => :string, "h" => :boolean, "i" => :date, - "j" => {:datetime, :nanosecond} + "j" => {:naive_datetime, :nanosecond} } end @@ -1879,7 +1879,7 @@ defmodule Explorer.DataFrameTest do } end - test "add columns with date and datetime operations" do + test "add columns with date and naive datetime operations" do df = DF.new( a: [~D[2023-01-15], ~D[2022-02-16], ~D[2021-03-20], nil], @@ -1933,7 +1933,7 @@ defmodule Explorer.DataFrameTest do assert df1.dtypes == %{ "a" => :date, - "b" => {:datetime, :microsecond}, + "b" => {:naive_datetime, :microsecond}, "c" => {:s, 8}, "d" => {:s, 8}, "e" => {:s, 8}, @@ -1997,7 +1997,7 @@ defmodule Explorer.DataFrameTest do assert DF.dtypes(df) == %{ "a" => {:list, :string}, "b" => {:list, {:s, 64}}, - "c" => {:list, {:datetime, :microsecond}}, + "c" => {:list, {:naive_datetime, :microsecond}}, "join" => :string, "lengths" => {:u, 32}, "member?" => :boolean @@ -2545,7 +2545,7 @@ defmodule Explorer.DataFrameTest do | Explorer DataFrame: [rows: 1, columns: 3] | +----------------------------+----------------------------+----------------+ | datetime1 | datetime2 | duration | - | | | | + | | | | +============================+============================+================+ | 2023-09-14 00:00:00.000000 | 2023-09-14 01:00:00.000000 | 1h | +----------------------------+----------------------------+----------------+ @@ -3843,7 +3843,7 @@ defmodule Explorer.DataFrameTest do string: ["a", "b", "c", "nil"], date: [~D[2021-01-01], ~D[1999-12-31], nil, ~D[2023-01-01]], time: [~T[00:02:03.000212], ~T[00:05:04.000456], ~T[00:07:04.000776], nil], - datetime: [ + naive_datetime: [ nil, ~N[2021-01-01 00:00:00], ~N[1999-12-31 00:00:00], @@ -3857,11 +3857,11 @@ defmodule Explorer.DataFrameTest do ] ) - df = DF.mutate(df, duration: datetime - duration) + df = DF.mutate(df, duration: naive_datetime - duration) assert df.dtypes == %{ "date" => :date, - "datetime" => {:datetime, :microsecond}, + "naive_datetime" => {:naive_datetime, :microsecond}, "duration" => {:duration, :microsecond}, "list" => {:list, :null}, # "null" => :null, @@ -3874,7 +3874,7 @@ defmodule Explorer.DataFrameTest do assert describe_df.dtypes == %{ "date" => :string, - "datetime" => :string, + "naive_datetime" => :string, "describe" => :string, "duration" => :string, "list" => :string, @@ -3886,7 +3886,7 @@ defmodule Explorer.DataFrameTest do assert DF.to_columns(describe_df, atom_keys: true) == %{ date: ["3", "1", nil, nil, nil, nil, nil, nil, nil], - datetime: [ + naive_datetime: [ "3", "1", nil, @@ -4553,13 +4553,13 @@ defmodule Explorer.DataFrameTest do st: json_decode(st, {:struct, %{"n" => {:s, 64}}}), f: json_decode(f, {:f, 64}), l: json_decode(l, {:list, {:s, 64}}), - dt: json_decode(dt, {:datetime, :microsecond}) + dt: json_decode(dt, {:naive_datetime, :microsecond}) ) assert df.dtypes == %{"dt" => :string, "f" => :string, "l" => :string, "st" => :string} assert df1.dtypes == %{ - "dt" => {:datetime, :microsecond}, + "dt" => {:naive_datetime, :microsecond}, "f" => {:f, 64}, "l" => {:list, {:s, 64}}, "st" => {:struct, [{"n", {:s, 64}}]} diff --git a/test/explorer/series/datetime_test.exs b/test/explorer/series/datetime_test.exs index b3886d803..d48d300a5 100644 --- a/test/explorer/series/datetime_test.exs +++ b/test/explorer/series/datetime_test.exs @@ -47,4 +47,52 @@ defmodule Explorer.Series.DateTimeTest do assert Series.second(series) |> Series.to_list() == [35, 35, 35] end end + + describe "timezones" do + test "UTC" do + datetimes_in = [ + ~U[2024-01-01T12:00:00.000000Z], + ~U[2024-01-01T13:00:00.000000Z], + ~U[2024-01-01T14:00:00.000000Z] + ] + + datetimes_out = + datetimes_in + |> Series.from_list() + |> Series.to_list() + + assert datetimes_out == datetimes_in + end + + test "America/New_York" do + datetimes_in = + [ + ~U[2024-01-01T12:00:00.000000Z], + ~U[2024-01-01T13:00:00.000000Z], + ~U[2024-01-01T14:00:00.000000Z] + ] + |> Enum.map(&DateTime.shift_zone!(&1, "America/New_York")) + + datetimes_out = + datetimes_in + |> Series.from_list() + |> Series.to_list() + + assert datetimes_out == datetimes_in + end + + test "can't build a series from datetimes with non-matching timezones" do + datetimes_in = + [ + ~U[2024-01-01T12:00:00.000000Z], + ~U[2024-01-01T13:00:00.000000Z] |> DateTime.shift_zone!("America/New_York") + ] + + assert_raise( + ArgumentError, + "the value #DateTime<2024-01-01 08:00:00.000000-05:00 EST America/New_York> does not match the inferred dtype {:datetime, :microsecond, \"Etc/UTC\"}", + fn -> Series.from_list(datetimes_in) end + ) + end + end end diff --git a/test/explorer/series/duration_test.exs b/test/explorer/series/duration_test.exs index d13a3fca8..ec2489d9e 100644 --- a/test/explorer/series/duration_test.exs +++ b/test/explorer/series/duration_test.exs @@ -252,24 +252,24 @@ defmodule Explorer.Series.DurationTest do fn -> Series.add(aug_20_s, aug_21_s) end end - # Datetime + # Naive Datetime - test "datetime[μs] + duration[μs]" do + test "naive_datetime[μs] + duration[μs]" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) sum_s = Series.add(eleven_s, one_hour_s) - assert sum_s.dtype == {:datetime, :microsecond} + assert sum_s.dtype == {:naive_datetime, :microsecond} twelve_ndt = ~N[2023-08-20 12:00:00.0000000] assert Series.to_list(sum_s) == [twelve_ndt] end - test "duration[μs] + datetime[μs]" do + test "duration[μs] + naive_datetime[μs]" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) sum_s = Series.add(one_hour_s, eleven_s) - assert sum_s.dtype == {:datetime, :microsecond} + assert sum_s.dtype == {:naive_datetime, :microsecond} twelve_ndt = ~N[2023-08-20 12:00:00.0000000] assert Series.to_list(sum_s) == [twelve_ndt] end @@ -279,7 +279,7 @@ defmodule Explorer.Series.DurationTest do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) sum_s = Series.add(eleven, one_hour_s) - assert sum_s.dtype == {:datetime, :microsecond} + assert sum_s.dtype == {:naive_datetime, :microsecond} assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] end @@ -288,11 +288,11 @@ defmodule Explorer.Series.DurationTest do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) sum_s = Series.add(one_hour_s, eleven) - assert sum_s.dtype == {:datetime, :microsecond} + assert sum_s.dtype == {:naive_datetime, :microsecond} assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] end - test "datetime[μs] + duration[ns] (different precisions)" do + test "naive_datetime[μs] + duration[ns] (different precisions)" do one_hour_ns = 3600 * 1_000_000_000 one_hour_s = Series.from_list([one_hour_ns], dtype: {:duration, :nanosecond}) eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) @@ -301,16 +301,78 @@ defmodule Explorer.Series.DurationTest do # Since we added a duration with :nanosecond precision from a datetime with :microsecond # precision, the resulting sum has :nanosecond precision since that was the highest # precision present in the operation. - assert sum_s.dtype == {:datetime, :nanosecond} + assert sum_s.dtype == {:naive_datetime, :nanosecond} assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] end - test "datetime[μs] + datetime[μs] raises ArgumentError" do + test "datetime[μs] + naive_datetime[μs] raises ArgumentError" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00]]) twelve_s = Series.from_list([~N[2023-08-20 12:00:00]]) assert_raise ArgumentError, - "cannot invoke Explorer.Series.add/2 with mismatched dtypes: {:datetime, :microsecond} and {:datetime, :microsecond}", + "cannot invoke Explorer.Series.add/2 with mismatched dtypes: {:naive_datetime, :microsecond} and {:naive_datetime, :microsecond}", + fn -> Series.add(eleven_s, twelve_s) end + end + + # Datetime + + test "datetime[μs, Etc/UTC] + duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + eleven_s = Series.from_list([~U[2023-08-20 11:00:00.0000000Z]]) + sum_s = Series.add(eleven_s, one_hour_s) + + assert sum_s.dtype == {:datetime, :microsecond, "Etc/UTC"} + twelve_utc = ~U[2023-08-20 12:00:00.0000000Z] + assert Series.to_list(sum_s) == [twelve_utc] + end + + test "duration[μs] + datetime[μs, Etc/UTC]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + eleven_s = Series.from_list([~U[2023-08-20 11:00:00.0000000Z]]) + sum_s = Series.add(one_hour_s, eleven_s) + + assert sum_s.dtype == {:datetime, :microsecond, "Etc/UTC"} + twelve_utc = ~U[2023-08-20 12:00:00.0000000Z] + assert Series.to_list(sum_s) == [twelve_utc] + end + + test "DateTime + duration[μs]" do + eleven = ~U[2023-08-20 11:00:00.0000000Z] + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(eleven, one_hour_s) + + assert sum_s.dtype == {:datetime, :microsecond, "Etc/UTC"} + assert Series.to_list(sum_s) == [~U[2023-08-20 12:00:00.0000000Z]] + end + + test "duration[μs] + DateTime" do + eleven = ~U[2023-08-20 11:00:00.0000000Z] + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_hour_s, eleven) + + assert sum_s.dtype == {:datetime, :microsecond, "Etc/UTC"} + assert Series.to_list(sum_s) == [~U[2023-08-20 12:00:00.0000000Z]] + end + + test "datetime[μs, Etc/UTC] + duration[ns] (different precisions)" do + one_hour_ns = 3600 * 1_000_000_000 + one_hour_s = Series.from_list([one_hour_ns], dtype: {:duration, :nanosecond}) + eleven_s = Series.from_list([~U[2023-08-20 11:00:00.0000000Z]]) + sum_s = Series.add(eleven_s, one_hour_s) + + # Since we added a duration with :nanosecond precision from a datetime with :microsecond + # precision, the resulting sum has :nanosecond precision since that was the highest + # precision present in the operation. + assert sum_s.dtype == {:datetime, :nanosecond, "Etc/UTC"} + assert Series.to_list(sum_s) == [~U[2023-08-20 12:00:00.0000000Z]] + end + + test "datetime[μs, Etc/UTC] + datetime[μs, Etc/UTC] raises ArgumentError" do + eleven_s = Series.from_list([~U[2023-08-20 11:00:00Z]]) + twelve_s = Series.from_list([~U[2023-08-20 12:00:00Z]]) + + assert_raise ArgumentError, + "cannot invoke Explorer.Series.add/2 with mismatched dtypes: {:datetime, :microsecond, \"Etc/UTC\"} and {:datetime, :microsecond, \"Etc/UTC\"}", fn -> Series.add(eleven_s, twelve_s) end end end @@ -392,9 +454,9 @@ defmodule Explorer.Series.DurationTest do assert Series.to_list(diff_s) == [@aug_20] end - # Datetime + # Naive Datetime - test "datetime[μs] - datetime[μs]" do + test "naive_datetime[μs] - naive_datetime[μs]" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) diff_s = Series.subtract(twelve_s, eleven_s) @@ -403,16 +465,16 @@ defmodule Explorer.Series.DurationTest do assert Series.to_list(diff_s) == [@one_hour_duration_us] end - test "datetime[μs] - duration[μs]" do + test "naive_datetime[μs] - duration[μs]" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) diff_s = Series.subtract(twelve_s, one_hour_s) - assert diff_s.dtype == {:datetime, :microsecond} + assert diff_s.dtype == {:naive_datetime, :microsecond} assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] end - test "NaiveDateTime - datetime[μs]" do + test "NaiveDateTime - naive_datetime[μs]" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) twelve = ~N[2023-08-20 12:00:00.0000000] diff_s = Series.subtract(twelve, eleven_s) @@ -421,7 +483,7 @@ defmodule Explorer.Series.DurationTest do assert Series.to_list(diff_s) == [@one_hour_duration_us] end - test "datetime[μs] - NaiveDateTime" do + test "naive_datetime[μs] - NaiveDateTime" do eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) twelve = ~N[2023-08-20 12:00:00.0000000] diff_s = Series.subtract(eleven_s, twelve) @@ -435,11 +497,11 @@ defmodule Explorer.Series.DurationTest do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) diff_s = Series.subtract(twelve, one_hour_s) - assert diff_s.dtype == {:datetime, :microsecond} + assert diff_s.dtype == {:naive_datetime, :microsecond} assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] end - test "datetime[μs] - datetime[ns] (different precisions)" do + test "datetime[μs] - naive_datetime[ns] (different precisions)" do one_hour_ns = 3600 * 1_000_000_000 one_hour_s = Series.from_list([one_hour_ns], dtype: {:duration, :nanosecond}) twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) @@ -448,16 +510,16 @@ defmodule Explorer.Series.DurationTest do # Since we subtracted a duration with :nanosecond precision from a datetime with :microsecond # precision, the resulting difference has :nanosecond precision since that was the highest # precision present in the operation. - assert diff_s.dtype == {:datetime, :nanosecond} + assert diff_s.dtype == {:naive_datetime, :nanosecond} assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] end - test "duration[μs] - datetime[μs] raises ArgumentError" do + test "duration[μs] - naive_datetime[μs] raises ArgumentError" do one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) twelve_s = Series.from_list([~N[2023-08-20 12:00:00]]) assert_raise ArgumentError, - "cannot invoke Explorer.Series.subtract/2 with mismatched dtypes: {:duration, :microsecond} and {:datetime, :microsecond}", + "cannot invoke Explorer.Series.subtract/2 with mismatched dtypes: {:duration, :microsecond} and {:naive_datetime, :microsecond}", fn -> Series.subtract(one_hour_s, twelve_s) end end end @@ -663,8 +725,8 @@ defmodule Explorer.Series.DurationTest do assert inspect(df_with_diff) == """ #Explorer.DataFrame< Polars[1 x 3] - eleven datetime[μs] [2023-08-20 11:00:00.000000] - twelve datetime[μs] [2023-08-20 12:00:00.000000] + eleven naive_datetime[μs] [2023-08-20 11:00:00.000000] + twelve naive_datetime[μs] [2023-08-20 12:00:00.000000] diff duration[μs] [1h] >\ """ diff --git a/test/explorer/series/list_test.exs b/test/explorer/series/list_test.exs index 81a3140a6..552494ccd 100644 --- a/test/explorer/series/list_test.exs +++ b/test/explorer/series/list_test.exs @@ -135,10 +135,10 @@ defmodule Explorer.Series.ListTest do assert Series.to_list(series) == [[~D[2023-11-10]]] end - test "list of lists of one datetime" do + test "list of lists of one naive datetime" do series = Series.from_list([[~N[2023-11-10 00:19:30]]]) - assert series.dtype == {:list, {:datetime, :microsecond}} + assert series.dtype == {:list, {:naive_datetime, :microsecond}} assert series[0] == [~N[2023-11-10 00:19:30.000000]] assert Series.to_list(series) == [[~N[2023-11-10 00:19:30.000000]]] end @@ -235,9 +235,9 @@ defmodule Explorer.Series.ListTest do assert Series.to_list(s1) === [[[1.0, 2.0]], [[3.0, 4.0]]] end - test "list of integer series to list of datetime" do + test "list of integer series to list of naive datetime" do s = Series.from_list([[1, 2, 3], [1_649_883_642 * 1_000 * 1_000]]) - s1 = Series.cast(s, {:list, {:datetime, :microsecond}}) + s1 = Series.cast(s, {:list, {:naive_datetime, :microsecond}}) assert Series.to_list(s1) == [ [ @@ -248,7 +248,7 @@ defmodule Explorer.Series.ListTest do [~N[2022-04-13 21:00:42.000000]] ] - assert Series.dtype(s1) == {:list, {:datetime, :microsecond}} + assert Series.dtype(s1) == {:list, {:naive_datetime, :microsecond}} end test "deeper list of integers series to list of invalid dtype" do @@ -314,7 +314,7 @@ defmodule Explorer.Series.ListTest do """ #Explorer.Series< Polars[1] - list[datetime[μs]] [[2023-11-10 00:19:30.000000]] + list[naive_datetime[μs]] [[2023-11-10 00:19:30.000000]] >\ """ end diff --git a/test/explorer/series/struct_test.exs b/test/explorer/series/struct_test.exs index 4066802bc..d7dd77be1 100644 --- a/test/explorer/series/struct_test.exs +++ b/test/explorer/series/struct_test.exs @@ -158,7 +158,7 @@ defmodule Explorer.Series.StructTest do assert Series.dtype(s1) == {:struct, [{"a", {:struct, [{"b", {:f, 64}}]}}]} end - test "structs with integers to structs with datetimes" do + test "structs with integers to structs with naive datetimes" do s = Series.from_list([ %{a: 1}, @@ -167,7 +167,7 @@ defmodule Explorer.Series.StructTest do %{a: 1_649_883_642 * 1_000 * 1_000} ]) - s1 = Series.cast(s, {:struct, [{"a", {:datetime, :microsecond}}]}) + s1 = Series.cast(s, {:struct, [{"a", {:naive_datetime, :microsecond}}]}) assert Series.to_list(s1) == [ %{"a" => ~N[1970-01-01 00:00:00.000001]}, @@ -176,7 +176,7 @@ defmodule Explorer.Series.StructTest do %{"a" => ~N[2022-04-13 21:00:42.000000]} ] - assert Series.dtype(s1) == {:struct, [{"a", {:datetime, :microsecond}}]} + assert Series.dtype(s1) == {:struct, [{"a", {:naive_datetime, :microsecond}}]} end test "can cast dtype order" do diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index 0ed4229a4..17732014c 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -263,7 +263,7 @@ defmodule Explorer.SeriesTest do ~N[2353-03-07 00:39:35.702789] ] - assert Series.from_list(dates, dtype: {:datetime, :microsecond}) |> Series.to_list() == + assert Series.from_list(dates, dtype: {:naive_datetime, :microsecond}) |> Series.to_list() == dates today_in_days = Date.utc_today() |> Date.to_gregorian_days() @@ -282,7 +282,7 @@ defmodule Explorer.SeriesTest do |> NaiveDateTime.add(:rand.uniform(60), :second) end - assert Series.from_list(dates, dtype: {:datetime, :microsecond}) |> Series.to_list() == + assert Series.from_list(dates, dtype: {:naive_datetime, :microsecond}) |> Series.to_list() == dates end @@ -3830,9 +3830,9 @@ defmodule Explorer.SeriesTest do assert Series.dtype(s3) == :time end - test "integer series to datetime" do + test "integer series to naive datetime" do s = Series.from_list([1, 2, 3]) - s1 = Series.cast(s, {:datetime, :microsecond}) + s1 = Series.cast(s, {:naive_datetime, :microsecond}) assert Series.to_list(s1) == [ ~N[1970-01-01 00:00:00.000001], @@ -3840,13 +3840,13 @@ defmodule Explorer.SeriesTest do ~N[1970-01-01 00:00:00.000003] ] - assert Series.dtype(s1) == {:datetime, :microsecond} + assert Series.dtype(s1) == {:naive_datetime, :microsecond} s2 = Series.from_list([1_649_883_642 * 1_000 * 1_000]) - s3 = Series.cast(s2, {:datetime, :microsecond}) + s3 = Series.cast(s2, {:naive_datetime, :microsecond}) assert Series.to_list(s3) == [~N[2022-04-13 21:00:42.000000]] - assert Series.dtype(s3) == {:datetime, :microsecond} + assert Series.dtype(s3) == {:naive_datetime, :microsecond} end test "string series to category" do @@ -3857,17 +3857,17 @@ defmodule Explorer.SeriesTest do assert Series.dtype(s1) == :category end - test "string series to datetime" do - s = Series.from_list(["2023-08-29 17:39:43", "2023-08-29 17:20:09"]) - ms = Series.cast(s, {:datetime, :millisecond}) - us = Series.cast(s, {:datetime, :microsecond}) - ns = Series.cast(s, {:datetime, :nanosecond}) + test "string series to naive datetime" do + s = Series.from_list(["2023-08-29T17:39:43", "2023-08-29T17:20:09"]) + ms = Series.cast(s, {:naive_datetime, :millisecond}) + us = Series.cast(s, {:naive_datetime, :microsecond}) + ns = Series.cast(s, {:naive_datetime, :nanosecond}) - assert Series.dtype(ms) == {:datetime, :millisecond} - assert Series.dtype(us) == {:datetime, :microsecond} - assert Series.dtype(ns) == {:datetime, :nanosecond} + assert Series.dtype(ms) == {:naive_datetime, :millisecond} + assert Series.dtype(us) == {:naive_datetime, :microsecond} + assert Series.dtype(ns) == {:naive_datetime, :nanosecond} - expected = [~N[2023-08-29 17:39:43.000000], ~N[2023-08-29 17:20:09.000000]] + expected = [~N[2023-08-29T17:39:43.000000], ~N[2023-08-29T17:20:09.000000]] assert Series.to_list(ms) == expected assert Series.to_list(us) == expected assert Series.to_list(ns) == expected @@ -6002,12 +6002,12 @@ defmodule Explorer.SeriesTest do end @tag :skip - test "datetime" do + test "naive datetime" do series = Series.from_binary( <<-62_135_596_800_000_000::signed-64-native, 0::signed-64-native, 529_550_625_987_654::signed-64-native>>, - {:datetime, :microsecond} + {:naive_datetime, :microsecond} ) # There is a precision problem here. Investigate. diff --git a/test/test_helper.exs b/test/test_helper.exs index 3800bfa64..c892e047c 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -43,4 +43,6 @@ defmodule Explorer.IOHelpers do end end +Calendar.put_time_zone_database(Tz.TimeZoneDatabase) + ExUnit.start(exclude: [:cloud_integration, :property])