Skip to content

Commit

Permalink
Initial timezone support (#903)
Browse files Browse the repository at this point in the history
* starting to work

* sort features

* pass in atom

* better timeunit handling

* timezones feature

* phase out `precision_to_timeunit`

* datetime key additions/shuffling

* formatting/linting

* start to move to a naive_datetime dtype

* get datetimes back out (incorrectly)

* start to get timezones working

* offsets can be negative

* add tz dep

* working non-UTC timezone test

* get duration tests passing

* majority of tests passing

* fix unexpected datetime parsing issue

* make enforce_highest_precision work with different sized tuples

* add a datetime section to some duration tests

* test mismatched timezones

* make tz a test-only dep

* save progress on From traits (not working!)

* rename to s_from_list_naive_datetime

* revert to `&'a str` type and add TODOs

* didn't mean to remove this
  • Loading branch information
billylanchantin authored May 5, 2024
1 parent c64477c commit a9ac048
Show file tree
Hide file tree
Showing 28 changed files with 795 additions and 298 deletions.
2 changes: 1 addition & 1 deletion lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ defmodule Explorer.Backend.LazySeries do

@impl true
def strptime(%Series{} = series, format_string) do
dtype = {:datetime, :microsecond}
dtype = {:naive_datetime, :microsecond}
data = new(:strptime, [lazy_series!(series), format_string], dtype)

Backend.Series.new(data, dtype)
Expand Down
15 changes: 9 additions & 6 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2857,7 +2857,10 @@ defmodule Explorer.DataFrame do
date = %Date{} ->
LazySeries.new(:lazy, [date], :date)

datetime = %NaiveDateTime{} ->
naive_datetime = %NaiveDateTime{} ->
LazySeries.new(:lazy, [naive_datetime], {:naive_datetime, :nanosecond})

datetime = %DateTime{} ->
LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond})

duration = %Explorer.Duration{precision: precision} ->
Expand Down Expand Up @@ -2949,10 +2952,10 @@ defmodule Explorer.DataFrame do
in microseconds from the Unix epoch:
iex> df = Explorer.DataFrame.new([])
iex> Explorer.DataFrame.put(df, :a, Nx.tensor([1, 2, 3]), dtype: {:datetime, :microsecond})
iex> Explorer.DataFrame.put(df, :a, Nx.tensor([1, 2, 3]), dtype: {:naive_datetime, :microsecond})
#Explorer.DataFrame<
Polars[3 x 1]
a datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003]
a naive_datetime[μs] [1970-01-01 00:00:00.000001, 1970-01-01 00:00:00.000002, 1970-01-01 00:00:00.000003]
>
If there is already a column where we want to place the tensor,
Expand All @@ -2964,7 +2967,7 @@ defmodule Explorer.DataFrame do
iex> Explorer.DataFrame.put(df, :a, Nx.tensor(529550625987654))
#Explorer.DataFrame<
Polars[1 x 1]
a datetime[μs] [1986-10-13 01:23:45.987654]
a naive_datetime[μs] [1986-10-13 01:23:45.987654]
>
This is particularly useful for categorical columns:
Expand Down Expand Up @@ -5749,15 +5752,15 @@ defmodule Explorer.DataFrame do
stat_cols = df.names
percentiles = process_percentiles(opts[:percentiles])
numeric_types = Shared.numeric_types()
datetime_types = Shared.datetime_types()
naive_datetime_types = Shared.naive_datetime_types()
duration_types = Shared.duration_types()

metrics_df =
summarise_with(df, fn x ->
Enum.flat_map(stat_cols, fn c ->
dt = x[c].dtype
numeric? = dt in numeric_types
min_max? = numeric? or dt in datetime_types or dt in duration_types
min_max? = numeric? or dt in naive_datetime_types or dt in duration_types

[
{"count:#{c}", Series.count(x[c])},
Expand Down
3 changes: 2 additions & 1 deletion lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ defmodule Explorer.PolarsBackend.Native do
def s_from_list_bool(_name, _val), do: err()
def s_from_list_date(_name, _val), do: err()
def s_from_list_time(_name, _val), do: err()
def s_from_list_datetime(_name, _val, _precision), do: err()
def s_from_list_naive_datetime(_name, _val, _precision), do: err()
def s_from_list_datetime(_name, _val, _precision, _tz), do: err()
def s_from_list_duration(_name, _val, _precision), do: err()
def s_from_list_f32(_name, _val), do: err()
def s_from_list_f64(_name, _val), do: err()
Expand Down
11 changes: 6 additions & 5 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ defmodule Explorer.PolarsBackend.Shared do
:category -> Native.s_from_list_categories(name, list)
:date -> Native.s_from_list_date(name, list)
:time -> Native.s_from_list_time(name, list)
{:datetime, precision} -> Native.s_from_list_datetime(name, list, Atom.to_string(precision))
{:duration, precision} -> Native.s_from_list_duration(name, list, Atom.to_string(precision))
{:naive_datetime, precision} -> Native.s_from_list_naive_datetime(name, list, precision)
{:datetime, precision, tz} -> Native.s_from_list_datetime(name, list, precision, tz)
{:duration, precision} -> Native.s_from_list_duration(name, list, precision)
:binary -> Native.s_from_list_binary(name, list)
:null -> Native.s_from_list_null(name, length(list))
end
Expand All @@ -205,13 +206,13 @@ defmodule Explorer.PolarsBackend.Shared do
:time ->
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :millisecond} ->
{:naive_datetime, :millisecond} ->
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :microsecond} ->
{:naive_datetime, :microsecond} ->
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:datetime, :nanosecond} ->
{:naive_datetime, :nanosecond} ->
Native.s_from_binary_s64(name, binary) |> Native.s_cast(dtype) |> ok()

{:duration, :millisecond} ->
Expand Down
Loading

0 comments on commit a9ac048

Please sign in to comment.