Skip to content

Commit

Permalink
from_json changes
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee committed Jan 31, 2024
1 parent f48624d commit 24c4f82
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 89 deletions.
6 changes: 3 additions & 3 deletions lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ defmodule Explorer.Backend.LazySeries do
member: 3,
# Struct functions
field: 2,
json_decode: 3
from_json: 3
]

@comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal]
Expand Down Expand Up @@ -1096,8 +1096,8 @@ defmodule Explorer.Backend.LazySeries do
end

@impl true
def json_decode(series, dtype, infer_schema_length) do
data = new(:json_decode, [lazy_series!(series), dtype, infer_schema_length], dtype)
def from_json(series, dtype, infer_schema_length) do
data = new(:from_json, [lazy_series!(series), dtype, infer_schema_length], dtype)

Backend.Series.new(data, dtype)
end
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ defmodule Explorer.Backend.Series do

# Struct
@callback field(s, String.t()) :: s
@callback json_decode(s, option(dtype()), option(non_neg_integer())) :: s
@callback from_json(s, option(dtype()), option(non_neg_integer())) :: s

# Functions

Expand Down
11 changes: 8 additions & 3 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ defmodule Explorer.PolarsBackend.Expression do
member: 3,

# Structs
field: 2,
json_decode: 3
field: 2
]

@custom_expressions [
Expand All @@ -162,7 +161,8 @@ defmodule Explorer.PolarsBackend.Expression do
concat: 1,
column: 1,
correlation: 4,
covariance: 3
covariance: 3,
from_json: 3
]

missing =
Expand All @@ -173,6 +173,11 @@ defmodule Explorer.PolarsBackend.Expression do
raise ArgumentError, "missing #{inspect(__MODULE__)} nodes: #{inspect(missing)}"
end

def to_expr(%LazySeries{op: :from_json, args: _}) do
raise ArgumentError,
"from_json/2 is not supported as a dataframe operation. Use Series.from_json/2 instead."
end

def to_expr(%LazySeries{op: :cast, args: [lazy_series, dtype]}) do
lazy_series_expr = to_expr(lazy_series)
Native.expr_cast(lazy_series_expr, dtype)
Expand Down
2 changes: 1 addition & 1 deletion lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_member(_s, _value, _inner_dtype), do: err()

def s_field(_s, _name), do: err()
def s_json_decode(_s, _dtype, _infer_schema_length), do: err()
def s_from_json(_s, _dtype, _infer_schema_length), do: err()

defp err, do: :erlang.nif_error(:nif_not_loaded)
end
4 changes: 2 additions & 2 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -739,8 +739,8 @@ defmodule Explorer.PolarsBackend.Series do
do: Shared.apply_series(series, :s_field, [name])

@impl true
def json_decode(series, dtype, infer_schema_length),
do: Shared.apply_series(series, :s_json_decode, [dtype, infer_schema_length])
def from_json(series, dtype, infer_schema_length),
do: Shared.apply_series(series, :s_from_json, [dtype, infer_schema_length])

# Polars specific functions

Expand Down
71 changes: 26 additions & 45 deletions lib/explorer/polars_backend/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -42,56 +42,37 @@ defmodule Explorer.PolarsBackend.Shared do
def apply_dataframe(%DataFrame{} = df, %DataFrame{} = out_df, fun, args) do
case apply(Native, fun, [df.data | args]) do
{:ok, %module{} = new_df} when module in @polars_df ->
{struct?, dtypes} =
if @check_frames do
# We need to collect here, because the lazy frame may not have
# the full picture of the result yet.
check_df =
if match?(%PolarsLazyFrame{}, new_df) do
{:ok, new_df} = Native.lf_collect(new_df)
create_dataframe(new_df)
else
create_dataframe(new_df)
end

# When dealing with structs in mutate, we may not know dtype of struct series.
# We have to accept the dtype returned by polars, else we will have mismatch error.
{struct?, out_dtypes} =
if fun == :df_mutate_with_exprs do
Enum.reduce(check_df.dtypes, {false, out_df.dtypes}, fn
{key, {:struct, _} = dtype}, {_, dtypes} -> {true, Map.put(dtypes, key, dtype)}
_, acc -> acc
end)
else
{false, out_df.dtypes}
end

if Enum.sort(out_df.names) != Enum.sort(check_df.names) or
out_dtypes != check_df.dtypes do
raise """
DataFrame mismatch.
expected:
names: #{inspect(out_df.names)}
dtypes: #{inspect(out_df.dtypes)}
got:
names: #{inspect(check_df.names)}
dtypes: #{inspect(check_df.dtypes)}
"""
if @check_frames do
# We need to collect here, because the lazy frame may not have
# the full picture of the result yet.
check_df =
if match?(%PolarsLazyFrame{}, new_df) do
{:ok, new_df} = Native.lf_collect(new_df)
create_dataframe(new_df)
else
create_dataframe(new_df)
end

{struct?, out_dtypes}
end
if Enum.sort(out_df.names) != Enum.sort(check_df.names) or
out_df.dtypes != check_df.dtypes do
raise """
DataFrame mismatch.
expected:
names: #{inspect(out_df.names)}
dtypes: #{inspect(out_df.dtypes)}
if struct? do
%{out_df | data: new_df, dtypes: dtypes}
else
%{out_df | data: new_df}
got:
names: #{inspect(check_df.names)}
dtypes: #{inspect(check_df.dtypes)}
"""
end
end

%{out_df | data: new_df}

{:error, error} ->
raise runtime_error(error)
end
Expand Down
22 changes: 17 additions & 5 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6044,26 +6044,38 @@ defmodule Explorer.Series do
end

@doc """
Decode json from string.
Converts a string series containing valid json to a struct series.
## Examples
iex> s = Series.from_list(["{\\"a\\":1}"])
iex> Series.json_decode(s)
iex> Series.from_json(s)
#Explorer.Series<
Polars[1]
struct[1] [%{"a" => 1}]
>
Will raise `RuntimeError` for invalid json.
`Series.from_json/2` is a series only operation. It is not supported as a data frame operation.
`DataFrame.put/3` can be used to add converted series to `DataFrame`.
iex> df = Explorer.DataFrame.new([%{a: "{\\"n\\": 1}"}])
iex> s = Series.from_json(df["a"])
iex> Explorer.DataFrame.put(df, "aj", s)
#Explorer.DataFrame<
Polars[1 x 2]
a string ["{\\"n\\": 1}"]
aj struct[1] [%{"n" => 1}]
>
"""
@doc type: :struct_wise
@spec json_decode(Series.t(), Keyword.t()) :: Series.t()
def json_decode(%Series{dtype: :string} = series, opts \\ []) do
@spec from_json(Series.t(), Keyword.t()) :: Series.t()
def from_json(%Series{dtype: :string} = series, opts \\ []) do
opts = Keyword.validate!(opts, [:dtype, :infer_schema_length])
dtype = if opts[:dtype], do: Shared.normalise_dtype!(opts[:dtype])

apply_series(series, :json_decode, [dtype, opts[:infer_schema_length]])
apply_series(series, :from_json, [dtype, opts[:infer_schema_length]])
end

# Helpers
Expand Down
4 changes: 2 additions & 2 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ rustler::init!(
expr_member,
// struct expressions
expr_field,
expr_json_decode,
// expr_json_decode,
// lazyframe
lf_collect,
lf_describe_plan,
Expand Down Expand Up @@ -481,7 +481,7 @@ rustler::init!(
s_lengths,
s_member,
s_field,
s_json_decode,
s_from_json,
],
load = on_load
);
2 changes: 1 addition & 1 deletion native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1826,7 +1826,7 @@ pub fn s_field(s: ExSeries, name: &str) -> Result<ExSeries, ExplorerError> {
}

#[rustler::nif]
pub fn s_json_decode(
pub fn s_from_json(
s: ExSeries,
ex_dtype: Option<ExSeriesDtype>,
infer_schema_length: Option<usize>,
Expand Down
26 changes: 0 additions & 26 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -1870,32 +1870,6 @@ defmodule Explorer.DataFrameTest do
member?: [true, false]
}
end

test "extracts struct from json - json_decode" do
df = DF.new([%{a: "{\"n\": 1}"}])
dfj = DF.mutate(df, aj: json_decode(a, dtype: {:struct, %{"n" => {:s, 64}}}))
assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:s, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1}}]
end

test "extracts struct from json - json_decode with dtype" do
df = DF.new([%{a: "{\"n\": 1}"}])
dfj = DF.mutate(df, aj: json_decode(a, dtype: {:struct, %{"n" => {:f, 64}}}))
assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:f, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1.0}}]
end

test "extracts struct from json - json_decode with infer_schema_length" do
df = DF.new([%{a: "{\"n\": 1}"}])

dfj =
DF.mutate(df,
aj: json_decode(a, infer_schema_length: 100)
)

assert dfj.dtypes == %{"a" => :string, "aj" => {:struct, %{"n" => {:s, 64}}}}
assert DF.to_rows(dfj) == [%{"a" => "{\"n\": 1}", "aj" => %{"n" => 1}}]
end
end

describe "sort_by/3" do
Expand Down
23 changes: 23 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5866,4 +5866,27 @@ defmodule Explorer.SeriesTest do
end
end
end

describe "from_json/2" do
test "extracts struct from json" do
s = Series.from_list(["{\"n\": 1}"])
sj = Series.from_json(s)
assert sj.dtype == {:struct, %{"n" => {:s, 64}}}
assert Series.to_list(sj) == [%{"n" => 1}]
end

test "extracts struct from json with dtype" do
s = Series.from_list(["{\"n\": 1}"])
sj = Series.from_json(s, dtype: {:struct, %{"n" => {:f, 64}}})
assert sj.dtype == {:struct, %{"n" => {:f, 64}}}
assert Series.to_list(sj) == [%{"n" => 1.0}]
end

test "extracts struct from json with infer_schema_length" do
s = Series.from_list(["{\"n\": 1}"])
sj = Series.from_json(s, infer_schema_length: 100)
assert sj.dtype == {:struct, %{"n" => {:s, 64}}}
assert Series.to_list(sj) == [%{"n" => 1}]
end
end
end

0 comments on commit 24c4f82

Please sign in to comment.