Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
iurimateus committed Feb 18, 2024
1 parent 0f666b6 commit 9fb0d96
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 0 deletions.
43 changes: 43 additions & 0 deletions lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ defmodule Explorer.Backend.LazySeries do
member: 3,
# Struct functions
field: 2
# ?
len: 0
]

@comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal]
Expand Down Expand Up @@ -1111,6 +1113,47 @@ defmodule Explorer.Backend.LazySeries do
Backend.Series.new(data, :string)
end

def len() do
data = new(:len, [], {:u, 32})

Backend.Series.new(data, {:u, 32})
end

def row_index() do
data = new(:row_index, [], {:u, 32})

Backend.Series.new(data, {:u, 32})
end

def int_range(start, end_, step \\ 1) do
# TODO check boundaries?

# ** (RuntimeError) Polars Error: lengths don't match: unable to add a column of length 2 to
# a DataFrame of height 3

# TODO Should we also accept a Series? or only LazySeries?
{start, end_} =
case {start, end_} do
{%Series{}, %Series{}} ->
{lazy_series!(start), lazy_series!(end_)}

{%Series{}, _} ->
{lazy_series!(start), end_}

{_, %Series{}} ->
{start, lazy_series!(end_)}

{v1, v2} when is_integer(v1) and is_integer(v2) ->
{v1, v2}

{_, _} ->
raise ArgumentError, "invalid arguments"
end

data = new(:int_range, [start, end_, step], {:s, 64})
Backend.Series.new(data, {:s, 64})
end

@remaining_non_lazy_operations [
at: 2,
at_every: 2,
Expand Down
19 changes: 19 additions & 0 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ defmodule Explorer.PolarsBackend.Expression do
shift: 3,
slice: 2,
slice: 3,
row_index: 0,
len: 0,
int_range: 4,
concat: 1,
column: 1,
correlation: 4,
Expand Down Expand Up @@ -296,6 +299,22 @@ defmodule Explorer.PolarsBackend.Expression do
end
end

def to_expr(%LazySeries{op: :len, args: []}) do
Native.expr_len()
end

def to_expr(%LazySeries{op: :row_index, args: []}) do
Native.expr_int_range(to_expr(0), Native.expr_len(), 1, {:u, 32})
end

def to_expr(%LazySeries{
op: :int_range,
args: [start, end_, step],
dtype: dtype
}) do
Native.expr_int_range(to_expr(start), to_expr(end_), step, dtype)
end

for {op, arity} <- @all_expressions do
args = Macro.generate_arguments(arity, __MODULE__)

Expand Down
2 changes: 2 additions & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ defmodule Explorer.PolarsBackend.Native do
def expr_string(_string), do: err()
def expr_struct(_map), do: err()

def expr_int_range(_start, _end, _step, _dtype), do: err()

# LazyFrame
def lf_collect(_df), do: err()
def lf_describe_plan(_df, _optimized), do: err()
Expand Down
12 changes: 12 additions & 0 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6024,6 +6024,18 @@ defmodule Explorer.Series do
def member?(%Series{dtype: dtype}, _value),
do: dtype_error("member?/2", dtype, [{:list, :_}])

def len() do
Explorer.Backend.LazySeries.len()
end

def row_index() do
Explorer.Backend.LazySeries.row_index()
end

def int_range(start, end_, step \\ 1) do
Explorer.Backend.LazySeries.int_range(start, end_, step)
end

# Escape hatch

@doc """
Expand Down
1 change: 1 addition & 0 deletions native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ features = [
"product",
"peaks",
"moment",
"range",
"rank",
"propagate_nans",
"extract_jsonpath"
Expand Down
16 changes: 16 additions & 0 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,22 @@ pub fn expr_join(expr: ExExpr, sep: String) -> ExExpr {
ExExpr::new(expr.list().join(sep.lit()))
}

#[rustler::nif]
pub fn expr_int_range(start: ExExpr, end: ExExpr, step: i64, dtype: ExSeriesDtype) -> ExExpr {
let start = start.clone_inner();
let end = end.clone_inner();
let dtype = DataType::try_from(&dtype).unwrap();
let expr = dsl::int_range(start, end, step, dtype);

ExExpr::new(expr)
}

#[rustler::nif]
pub fn expr_len() -> ExExpr {
let expr = dsl::count();
ExExpr::new(expr)
}

#[rustler::nif]
pub fn expr_lengths(expr: ExExpr) -> ExExpr {
let expr = expr.clone_inner();
Expand Down
2 changes: 2 additions & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ rustler::init!(
expr_float,
expr_head,
expr_integer,
expr_int_range,
expr_len,
expr_peaks,
expr_rank,
expr_unary_not,
Expand Down
41 changes: 41 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -4452,4 +4452,45 @@ defmodule Explorer.DataFrameTest do
}
end
end

describe "row_index/2" do
test "int_range" do
df = DF.new(a: [1, 3, 5], b: [2, 4, 6])

df1 = DF.mutate(df, x: len())

assert DF.to_columns(df1, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
x: [3, 3, 3]
}

df2 = DF.mutate(df, id: row_index()) |> DF.collect()

assert DF.to_columns(df2, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
id: [0, 1, 2]
}

df3 = DF.mutate(df, id: int_range(0, len()))
assert DF.to_columns(df3) == DF.to_columns(df2)

df4 = DF.mutate_with(df, fn _ -> [id: Series.row_index()] end)

assert DF.to_columns(df4, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
id: [0, 1, 2]
}

df5 = DF.mutate(df, x: int_range(0, 15, 5))

assert DF.to_columns(df5, atom_keys: true) == %{
a: [1, 3, 5],
b: [2, 4, 6],
x: [0, 5, 10]
}
end
end
end

0 comments on commit 9fb0d96

Please sign in to comment.