diff --git a/lib/explorer/backend/data_frame.ex b/lib/explorer/backend/data_frame.ex index 960e7c2a2..d65c24a7b 100644 --- a/lib/explorer/backend/data_frame.ex +++ b/lib/explorer/backend/data_frame.ex @@ -162,7 +162,7 @@ defmodule Explorer.Backend.DataFrame do @callback mask(df, mask :: series) :: df @callback filter_with(df, out_df :: df(), lazy_series()) :: df @callback mutate_with(df, out_df :: df(), mutations :: [{column_name(), lazy_series()}]) :: df - @callback arrange_with( + @callback sort_with( df, out_df :: df(), directions :: [{:asc | :desc, lazy_series()}], diff --git a/lib/explorer/data_frame.ex b/lib/explorer/data_frame.ex index 5a38cae93..cea765b32 100644 --- a/lib/explorer/data_frame.ex +++ b/lib/explorer/data_frame.ex @@ -61,7 +61,7 @@ defmodule Explorer.DataFrame do - `select/2` for picking columns and `discard/2` to discard them - `filter/2` for picking rows based on predicates - `mutate/2` for adding or replacing columns that are functions of existing columns - - `arrange/2` for changing the ordering of rows + - `sort_by/2` for changing the ordering of rows - `distinct/2` for picking unique rows - `summarise/2` for reducing multiple rows down to a single summary - `pivot_longer/3` and `pivot_wider/4` for massaging dataframes into longer or @@ -3041,13 +3041,13 @@ defmodule Explorer.DataFrame do defp append_unless_present([], name), do: [name] @doc """ - Arranges/sorts rows by columns using `Explorer.Query`. + Sorts rows by columns using `Explorer.Query`. > #### Notice {: .notice} > > This is a macro. You must `require Explorer.DataFrame` before using it. - See `arrange_with/2` for a callback version of this function without + See `sort_with/2` for a callback version of this function without `Explorer.Query`. ## Options @@ -3072,7 +3072,7 @@ defmodule Explorer.DataFrame do A single column name will sort ascending by that column: iex> df = Explorer.DataFrame.new(a: ["b", "c", "a"], b: [1, 2, 3]) - iex> Explorer.DataFrame.arrange(df, a) + iex> Explorer.DataFrame.sort_by(df, a) #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] @@ -3082,7 +3082,7 @@ defmodule Explorer.DataFrame do You can also sort descending: iex> df = Explorer.DataFrame.new(a: ["b", "c", "a"], b: [1, 2, 3]) - iex> Explorer.DataFrame.arrange(df, desc: a) + iex> Explorer.DataFrame.sort_by(df, desc: a) #Explorer.DataFrame< Polars[3 x 2] a string ["c", "b", "a"] @@ -3092,7 +3092,7 @@ defmodule Explorer.DataFrame do You can specify how `nil`s are sorted: iex> df = Explorer.DataFrame.new(a: ["b", "c", nil, "a"]) - iex> Explorer.DataFrame.arrange(df, [desc: a], nils: :first) + iex> Explorer.DataFrame.sort_by(df, [desc: a], nils: :first) #Explorer.DataFrame< Polars[4 x 1] a string [nil, "c", "b", "a"] @@ -3101,7 +3101,7 @@ defmodule Explorer.DataFrame do Sorting by more than one column sorts them in the order they are entered: iex> df = Explorer.Datasets.fossil_fuels() - iex> Explorer.DataFrame.arrange(df, asc: total, desc: country) + iex> Explorer.DataFrame.sort_by(df, asc: total, desc: country) #Explorer.DataFrame< Polars[1094 x 10] year integer [2010, 2010, 2011, 2011, 2012, ...] @@ -3118,10 +3118,10 @@ defmodule Explorer.DataFrame do ## Grouped examples - When used in a grouped dataframe, arrange is going to sort each group individually and - then return the entire dataframe with the existing groups. If one of the arrange columns + When used in a grouped dataframe, sort_by is going to sort each group individually and + then return the entire dataframe with the existing groups. If one of the sort_by columns is also a group, the sorting for that column is not going to work. It is necessary to - first summarise the desired column and then arrange it. + first summarise the desired column and then sort_by it. Here is an example using the Iris dataset. We group by species and then we try to sort the dataframe by species and petal length, but only "petal length" is taken into account @@ -3129,7 +3129,7 @@ defmodule Explorer.DataFrame do iex> df = Explorer.Datasets.iris() iex> grouped = Explorer.DataFrame.group_by(df, "species") - iex> Explorer.DataFrame.arrange(grouped, desc: species, asc: sepal_width) + iex> Explorer.DataFrame.sort_by(grouped, desc: species, asc: sepal_width) #Explorer.DataFrame< Polars[150 x 5] Groups: ["species"] @@ -3141,11 +3141,11 @@ defmodule Explorer.DataFrame do > """ @doc type: :single - defmacro arrange(df, query, opts \\ []) do + defmacro sort_by(df, query, opts \\ []) do quote do require Explorer.Query - Explorer.DataFrame.arrange_with( + Explorer.DataFrame.sort_with( unquote(df), Explorer.Query.query(unquote(query)), unquote(opts) @@ -3153,13 +3153,21 @@ defmodule Explorer.DataFrame do end end + @deprecated "Use sort_by/3 instead" + @doc type: :single + defmacro arrange(df, query, opts \\ []) do + quote do + Explorer.DataFrame.sort_by(unquote(df), unquote(query), unquote(opts)) + end + end + @doc """ - Arranges/sorts rows by columns using a callback function. + Sorts rows by columns using a callback function. The callback receives a lazy dataframe which stores operations instead of values for efficient sorting. - This is a callback version of `arrange/2`. + This is a callback version of `sort_by/2`. ## Options @@ -3183,7 +3191,7 @@ defmodule Explorer.DataFrame do A single column name will sort ascending by that column: iex> df = Explorer.DataFrame.new(a: ["b", "c", "a"], b: [1, 2, 3]) - iex> Explorer.DataFrame.arrange_with(df, &(&1["a"])) + iex> Explorer.DataFrame.sort_with(df, &(&1["a"])) #Explorer.DataFrame< Polars[3 x 2] a string ["a", "b", "c"] @@ -3193,7 +3201,7 @@ defmodule Explorer.DataFrame do You can also sort descending: iex> df = Explorer.DataFrame.new(a: ["b", "c", "a"], b: [1, 2, 3]) - iex> Explorer.DataFrame.arrange_with(df, &[desc: &1["a"]]) + iex> Explorer.DataFrame.sort_with(df, &[desc: &1["a"]]) #Explorer.DataFrame< Polars[3 x 2] a string ["c", "b", "a"] @@ -3203,7 +3211,7 @@ defmodule Explorer.DataFrame do You can specify how `nil`s are sorted: iex> df = Explorer.DataFrame.new(a: ["b", "c", nil, "a"]) - iex> Explorer.DataFrame.arrange_with(df, &[desc: &1["a"]], nils: :first) + iex> Explorer.DataFrame.sort_with(df, &[desc: &1["a"]], nils: :first) #Explorer.DataFrame< Polars[4 x 1] a string [nil, "c", "b", "a"] @@ -3212,7 +3220,7 @@ defmodule Explorer.DataFrame do Sorting by more than one column sorts them in the order they are entered: iex> df = Explorer.DataFrame.new(a: [3, 1, 3], b: [2, 1, 3]) - iex> Explorer.DataFrame.arrange_with(df, &[desc: &1["a"], asc: &1["b"]]) + iex> Explorer.DataFrame.sort_with(df, &[desc: &1["a"], asc: &1["b"]]) #Explorer.DataFrame< Polars[3 x 2] a integer [3, 3, 1] @@ -3223,7 +3231,7 @@ defmodule Explorer.DataFrame do iex> df = Explorer.Datasets.iris() iex> grouped = Explorer.DataFrame.group_by(df, "species") - iex> Explorer.DataFrame.arrange_with(grouped, &[desc: &1["species"], asc: &1["sepal_width"]]) + iex> Explorer.DataFrame.sort_with(grouped, &[desc: &1["species"], asc: &1["sepal_width"]]) #Explorer.DataFrame< Polars[150 x 5] Groups: ["species"] @@ -3235,13 +3243,13 @@ defmodule Explorer.DataFrame do > """ @doc type: :single - @spec arrange_with( + @spec sort_with( df :: DataFrame.t(), (Explorer.Backend.LazyFrame.t() -> Series.lazy_t() | [Series.lazy_t()] | [{:asc | :desc, Series.lazy_t()}]), opts :: [nils: :first | :last, stable: boolean()] ) :: DataFrame.t() - def arrange_with(%DataFrame{} = df, fun, opts \\ []) when is_function(fun, 1) do + def sort_with(%DataFrame{} = df, fun, opts \\ []) when is_function(fun, 1) do [_descending? | opts] = Shared.validate_sort_options!(opts) ldf = Explorer.Backend.LazyFrame.new(df) @@ -3265,12 +3273,16 @@ defmodule Explorer.DataFrame do {:asc, lazy_series} other -> - raise "not a valid lazy series or arrange instruction: #{inspect(other)}" + raise "not a valid lazy series or sort_by instruction: #{inspect(other)}" end) - Shared.apply_impl(df, :arrange_with, [df, dir_and_lazy_series_pairs] ++ opts) + Shared.apply_impl(df, :sort_with, [df, dir_and_lazy_series_pairs] ++ opts) end + @deprecated "Use sort_with/3 instead" + @doc type: :single + def arrange_with(df, fun, opts \\ []), do: sort_with(df, fun, opts) + @doc """ Takes distinct rows by a selection of columns. @@ -5645,7 +5657,7 @@ defmodule Explorer.DataFrame do df |> group_by(columns) |> summarise_with(&[counts: Series.count(&1[col])]) - |> arrange_with(&[desc: &1[:counts]]) + |> sort_with(&[desc: &1[:counts]]) end def frequencies(_df, []), do: raise(ArgumentError, "columns cannot be empty") diff --git a/lib/explorer/polars_backend/data_frame.ex b/lib/explorer/polars_backend/data_frame.ex index 06f464261..261f1b602 100644 --- a/lib/explorer/polars_backend/data_frame.ex +++ b/lib/explorer/polars_backend/data_frame.ex @@ -643,7 +643,7 @@ defmodule Explorer.PolarsBackend.DataFrame do end @impl true - def arrange_with( + def sort_with( %DataFrame{} = df, out_df, column_pairs, @@ -659,7 +659,7 @@ defmodule Explorer.PolarsBackend.DataFrame do |> Enum.map(fn {dir, %{args: [col]}} -> {dir == :desc, col} end) |> Enum.unzip() - Shared.apply_dataframe(df, out_df, :df_arrange, [ + Shared.apply_dataframe(df, out_df, :df_sort_by, [ column_names, directions, maintain_order?, @@ -673,7 +673,7 @@ defmodule Explorer.PolarsBackend.DataFrame do |> Enum.map(fn {dir, lazy_series} -> {dir == :desc, to_expr(lazy_series)} end) |> Enum.unzip() - Shared.apply_dataframe(df, out_df, :df_arrange_with, [ + Shared.apply_dataframe(df, out_df, :df_sort_with, [ expressions, directions, maintain_order?, diff --git a/lib/explorer/polars_backend/lazy_frame.ex b/lib/explorer/polars_backend/lazy_frame.ex index 13105be01..8216a013d 100644 --- a/lib/explorer/polars_backend/lazy_frame.ex +++ b/lib/explorer/polars_backend/lazy_frame.ex @@ -354,7 +354,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do end @impl true - def arrange_with( + def sort_with( %DF{groups: []} = df, out_df, column_pairs, @@ -369,7 +369,7 @@ defmodule Explorer.PolarsBackend.LazyFrame do |> Enum.map(fn {direction, lazy_series} -> {direction == :desc, to_expr(lazy_series)} end) |> Enum.unzip() - Shared.apply_dataframe(df, out_df, :lf_arrange_with, [ + Shared.apply_dataframe(df, out_df, :lf_sort_with, [ expressions, directions, maintain_order?, @@ -378,8 +378,8 @@ defmodule Explorer.PolarsBackend.LazyFrame do end @impl true - def arrange_with(_df, _out_df, _directions, _maintain_order?, _multithreaded?, _nulls_last?) do - raise "arrange_with/2 with groups is not supported yet for lazy frames" + def sort_with(_df, _out_df, _directions, _maintain_order?, _multithreaded?, _nulls_last?) do + raise "sort_with/2 with groups is not supported yet for lazy frames" end @impl true diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 1fa67243c..e1d8517c8 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -59,10 +59,10 @@ defmodule Explorer.PolarsBackend.Native do def df_from_arrow_stream_pointer(_stream_ptr), do: err() - def df_arrange(_df, _by, _reverse, _maintain_order?, _multithreaded?, _nulls_last?, _groups), + def df_sort_by(_df, _by, _reverse, _maintain_order?, _multithreaded?, _nulls_last?, _groups), do: err() - def df_arrange_with( + def df_sort_with( _df, _expressions, _directions, @@ -246,7 +246,7 @@ defmodule Explorer.PolarsBackend.Native do def lf_filter_with(_df, _expression), do: err() - def lf_arrange_with( + def lf_sort_with( _df, _expressions, _directions, diff --git a/lib/explorer/query.ex b/lib/explorer/query.ex index 2b2cff701..537753644 100644 --- a/lib/explorer/query.ex +++ b/lib/explorer/query.ex @@ -43,7 +43,7 @@ defmodule Explorer.Query do Queries are supported in the following operations: - * `Explorer.DataFrame.arrange/2` + * `Explorer.DataFrame.sort_by/2` * `Explorer.DataFrame.filter/2` * `Explorer.DataFrame.mutate/2` * `Explorer.DataFrame.summarise/2` @@ -218,7 +218,7 @@ defmodule Explorer.Query do petal_width_mean f64 [0.2439999999999999, 1.3259999999999998, 2.026] > - `arrange` expects a list of columns to sort by, while for-comprehensions + `sort_by` expects a list of columns to sort by, while for-comprehensions in `filter` generate a list of conditions, which are joined using `and`. For example, to filter all entries have both sepal and petal length above average, using a filter on the column name, one could write: diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 63a33cecf..059f1bce1 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -1694,7 +1694,7 @@ defmodule Explorer.Series do require Explorer.DataFrame Explorer.DataFrame.new(_: unquote(series)) - |> Explorer.DataFrame.arrange([{unquote(direction), unquote(query)}], unquote(opts)) + |> Explorer.DataFrame.sort_by([{unquote(direction), unquote(query)}], unquote(opts)) |> Explorer.DataFrame.pull(:_) end end @@ -1752,7 +1752,7 @@ defmodule Explorer.Series do {direction, opts} = Keyword.pop(opts, :direction, :asc) Explorer.DataFrame.new(series: series) - |> Explorer.DataFrame.arrange_with(&[{direction, fun.(&1[:series])}], opts) + |> Explorer.DataFrame.sort_with(&[{direction, fun.(&1[:series])}], opts) |> Explorer.DataFrame.pull(:series) end diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index d3ef7b95a..0e74f9959 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -323,7 +323,7 @@ fn arrow_to_explorer_error(error: impl std::fmt::Debug) -> ExplorerError { } #[rustler::nif(schedule = "DirtyCpu")] -pub fn df_arrange( +pub fn df_sort_by( df: ExDataFrame, by_columns: Vec, reverse: Vec, @@ -354,7 +354,7 @@ pub fn df_arrange( } #[rustler::nif(schedule = "DirtyCpu")] -pub fn df_arrange_with( +pub fn df_sort_with( data: ExDataFrame, expressions: Vec, directions: Vec, diff --git a/native/explorer/src/lazyframe.rs b/native/explorer/src/lazyframe.rs index 6a4beae39..0d6d193db 100644 --- a/native/explorer/src/lazyframe.rs +++ b/native/explorer/src/lazyframe.rs @@ -105,7 +105,7 @@ pub fn lf_filter_with(data: ExLazyFrame, ex_expr: ExExpr) -> Result, directions: Vec, diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index 8b0da8a5b..927abb94a 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -75,8 +75,8 @@ rustler::init!( "Elixir.Explorer.PolarsBackend.Native", [ df_from_arrow_stream_pointer, - df_arrange, - df_arrange_with, + df_sort_by, + df_sort_with, df_concat_columns, df_concat_rows, df_describe, @@ -294,7 +294,7 @@ rustler::init!( lf_from_parquet_cloud, lf_from_ndjson, lf_filter_with, - lf_arrange_with, + lf_sort_with, lf_distinct, lf_mutate_with, lf_summarise_with, diff --git a/notebooks/exploring_explorer.livemd b/notebooks/exploring_explorer.livemd index 8ba9fb669..b4f7a74ed 100644 --- a/notebooks/exploring_explorer.livemd +++ b/notebooks/exploring_explorer.livemd @@ -797,7 +797,7 @@ In `Explorer`, like in `dplyr`, we have five main verbs to work with dataframes: * select * filter * mutate -* arrange +* sort * summarise We are going to explore then in this notebook, but first we need to "require" @@ -1050,12 +1050,12 @@ DF.mutate(df, %{"gas_fuel" => gas_fuel - 10}) `DF.transmute/2`, which is `DF.mutate/2` that only retains the specified columns, is forthcoming. -### Arrange +### Sort Sorting the dataframe is pretty straightforward. ```elixir -DF.arrange(df, year) +DF.sort_by(df, year) ``` @@ -1079,7 +1079,7 @@ DF.arrange(df, year) But it comes with some tricks up its sleeve. ```elixir -DF.arrange(df, asc: total, desc: year) +DF.sort_by(df, asc: total, desc: year) ``` @@ -1100,10 +1100,10 @@ DF.arrange(df, asc: total, desc: year) > ``` -As the examples show, `arrange/2` is a macro, and therefore you can use some functions to arrange your dataframe: +As the examples show, `sort_by/2` is a macro, and therefore you can use some Series functions to sort your dataframe: ```elixir -DF.arrange(df, asc: Series.window_sum(total, 2)) +DF.sort_by(df, asc: window_sum(total, 2)) ``` @@ -2293,7 +2293,7 @@ But what we care about the most is aggregating! Let's see which country has the ```elixir grouped |> DF.summarise(max_per_capita: max(per_capita)) -|> DF.arrange(desc: max_per_capita) +|> DF.sort_by(desc: max_per_capita) ``` @@ -2337,7 +2337,7 @@ DF.summarise(grouped, min_per_capita: min(per_capita), min_total: min(total)) > ``` -Speaking of `mutate`, it's 'group-aware'. As are `arrange`, `distinct`, and `n_rows`. +Speaking of `mutate`, it's 'group-aware'. As are `sort_by`, `distinct`, and `n_rows`. ```elixir DF.mutate(grouped, total_window_sum: window_sum(total, 3), rows_in_group: count(country)) @@ -2369,7 +2369,7 @@ It's also possible to use aggregations inside other functions: ```elixir grouped |> DF.summarise(greater_than_9: greater(max(per_capita), 9.0), per_capita_max: max(per_capita)) -|> DataFrame.arrange(desc: per_capita_max) +|> DataFrame.sort_by(desc: per_capita_max) ``` ### That's it! diff --git a/test/explorer/data_frame/grouped_test.exs b/test/explorer/data_frame/grouped_test.exs index 7c2b1c0a6..9f3f73602 100644 --- a/test/explorer/data_frame/grouped_test.exs +++ b/test/explorer/data_frame/grouped_test.exs @@ -163,7 +163,7 @@ defmodule Explorer.DataFrame.GroupedTest do total_min: min(total), cement_median: median(cement) ) - |> DF.arrange(country) + |> DF.sort_by(country) assert DF.to_columns(df1, atom_keys: true) == %{ country: [ @@ -437,10 +437,10 @@ defmodule Explorer.DataFrame.GroupedTest do end end - describe "arrange/2" do + describe "sort_by/2" do test "sorts by group", %{df: df} do - df = DF.arrange(df, total) - grouped_df = df |> DF.group_by("country") |> DF.arrange(total) + df = DF.sort_by(df, total) + grouped_df = df |> DF.group_by("country") |> DF.sort_by(total) assert df["total"][0] == Series.min(df["total"]) @@ -452,12 +452,12 @@ defmodule Explorer.DataFrame.GroupedTest do end end - describe "arrange_with/2" do + describe "sort_with/2" do test "sorts by group", %{df: df} do grouped_df = df |> DF.group_by("country") - |> DF.arrange_with(fn ldf -> [asc: ldf["total"]] end) + |> DF.sort_with(fn ldf -> [asc: ldf["total"]] end) assert grouped_df |> DF.ungroup() diff --git a/test/explorer/data_frame/lazy_test.exs b/test/explorer/data_frame/lazy_test.exs index ec9993e98..eb8c60b21 100644 --- a/test/explorer/data_frame/lazy_test.exs +++ b/test/explorer/data_frame/lazy_test.exs @@ -517,8 +517,7 @@ defmodule Explorer.DataFrame.LazyTest do test "from_ipc/2", %{config: config} do path = "s3://test-bucket/test-lazy-writes/wine.ipc" - assert {:error, error} = - DF.from_ipc(path, config: config, lazy: true) + assert {:error, error} = DF.from_ipc(path, config: config, lazy: true) assert error == ArgumentError.exception( @@ -620,10 +619,10 @@ defmodule Explorer.DataFrame.LazyTest do end end - describe "arrange_with/2" do + describe "sort_with/2" do test "with a simple df and asc order" do ldf = DF.new([a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> [asc: ldf["a"]] end) + ldf1 = DF.sort_with(ldf, fn ldf -> [asc: ldf["a"]] end) df1 = DF.collect(ldf1) @@ -635,7 +634,7 @@ defmodule Explorer.DataFrame.LazyTest do test "with a simple df one column and without order" do ldf = DF.new([a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> ldf["a"] end) + ldf1 = DF.sort_with(ldf, fn ldf -> ldf["a"] end) df1 = DF.collect(ldf1) @@ -647,7 +646,7 @@ defmodule Explorer.DataFrame.LazyTest do test "with a simple df and desc order" do ldf = DF.new([a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> [desc: ldf["a"]] end) + ldf1 = DF.sort_with(ldf, fn ldf -> [desc: ldf["a"]] end) df1 = DF.collect(ldf1) @@ -659,7 +658,7 @@ defmodule Explorer.DataFrame.LazyTest do test "with a simple df and just the lazy series" do ldf = DF.new([a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> [ldf["a"]] end) + ldf1 = DF.sort_with(ldf, fn ldf -> [ldf["a"]] end) df1 = DF.collect(ldf1) @@ -669,9 +668,9 @@ defmodule Explorer.DataFrame.LazyTest do } end - test "with a simple df and arrange by two columns" do + test "with a simple df and sort_by by two columns" do ldf = DF.new([a: [1, 2, 2, 3, 6, 5], b: [1.1, 2.5, 2.2, 3.3, 4.0, 5.1]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) + ldf1 = DF.sort_with(ldf, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) df1 = DF.collect(ldf1) @@ -683,7 +682,7 @@ defmodule Explorer.DataFrame.LazyTest do test "with a simple df and window function" do ldf = DF.new([a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]], lazy: true) - ldf1 = DF.arrange_with(ldf, fn ldf -> [desc: Series.window_mean(ldf["a"], 2)] end) + ldf1 = DF.sort_with(ldf, fn ldf -> [desc: Series.window_mean(ldf["a"], 2)] end) df1 = DF.collect(ldf1) @@ -698,9 +697,9 @@ defmodule Explorer.DataFrame.LazyTest do ldf = DF.group_by(ldf, "b") assert_raise RuntimeError, - "arrange_with/2 with groups is not supported yet for lazy frames", + "sort_with/2 with groups is not supported yet for lazy frames", fn -> - DF.arrange_with(ldf, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) + DF.sort_with(ldf, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) end end end diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index 76259dde4..47451e45f 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1085,8 +1085,7 @@ defmodule Explorer.DataFrameTest do c = Series.from_list([6, 2, 1]) df = DF.new(a: a, b: b, c: c) - df1 = - DF.mutate(df, select1: select(a, b, c)) + df1 = DF.mutate(df, select1: select(a, b, c)) assert DF.to_columns(df1, atom_keys: true) == %{ a: [true, false, true], @@ -1119,8 +1118,7 @@ defmodule Explorer.DataFrameTest do c = Series.from_list([6, 2, 1]) df = DF.new(a: a, b: b, c: c) - df1 = - DF.mutate(df, select1: select(a, "passed", "failed"), select2: select(b > c, 50, 0)) + df1 = DF.mutate(df, select1: select(a, "passed", "failed"), select2: select(b > c, 50, 0)) assert DF.to_columns(df1, atom_keys: true) == %{ a: [true, false, true], @@ -1597,8 +1595,7 @@ defmodule Explorer.DataFrameTest do end test "replace characters in a string" do - df = - DF.new(a: ["2,000", "2,000,000", ","]) + df = DF.new(a: ["2,000", "2,000,000", ","]) df1 = DF.mutate(df, @@ -1882,18 +1879,18 @@ defmodule Explorer.DataFrameTest do end end - describe "arrange/3" do + describe "sort_by/3" do test "raises with invalid column names", %{df: df} do assert_raise ArgumentError, ~r"could not find column name \"test\"", - fn -> DF.arrange(df, test) end + fn -> DF.sort_by(df, test) end end end - describe "arrange_with/2" do + describe "sort_with/2" do test "with a simple df and asc order" do df = DF.new(a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]) - df1 = DF.arrange_with(df, fn ldf -> [asc: ldf["a"]] end) + df1 = DF.sort_with(df, fn ldf -> [asc: ldf["a"]] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [1, 2, 3, 4, 5, 6], @@ -1903,7 +1900,7 @@ defmodule Explorer.DataFrameTest do test "with a simple df one column and without order" do df = DF.new(a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]) - df1 = DF.arrange_with(df, fn ldf -> ldf["a"] end) + df1 = DF.sort_with(df, fn ldf -> ldf["a"] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [1, 2, 3, 4, 5, 6], @@ -1913,7 +1910,7 @@ defmodule Explorer.DataFrameTest do test "with a simple df and desc order" do df = DF.new(a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]) - df1 = DF.arrange_with(df, fn ldf -> [desc: ldf["a"]] end) + df1 = DF.sort_with(df, fn ldf -> [desc: ldf["a"]] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [6, 5, 4, 3, 2, 1], @@ -1923,7 +1920,7 @@ defmodule Explorer.DataFrameTest do test "with a simple df and just the lazy series" do df = DF.new(a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]) - df1 = DF.arrange_with(df, fn ldf -> [ldf["a"]] end) + df1 = DF.sort_with(df, fn ldf -> [ldf["a"]] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [1, 2, 3, 4, 5, 6], @@ -1931,9 +1928,9 @@ defmodule Explorer.DataFrameTest do } end - test "with a simple df and arrange by two columns" do + test "with a simple df and sort_by by two columns" do df = DF.new(a: [1, 2, 2, 3, 6, 5], b: [1.1, 2.5, 2.2, 3.3, 4.0, 5.1]) - df1 = DF.arrange_with(df, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) + df1 = DF.sort_with(df, fn ldf -> [asc: ldf["a"], asc: ldf["b"]] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [1, 2, 2, 3, 5, 6], @@ -1943,7 +1940,7 @@ defmodule Explorer.DataFrameTest do test "with a simple df and window function" do df = DF.new(a: [1, 2, 4, 3, 6, 5], b: ["a", "b", "d", "c", "f", "e"]) - df1 = DF.arrange_with(df, fn ldf -> [desc: Series.window_mean(ldf["a"], 2)] end) + df1 = DF.sort_with(df, fn ldf -> [desc: Series.window_mean(ldf["a"], 2)] end) assert DF.to_columns(df1, atom_keys: true) == %{ a: [5, 6, 3, 4, 2, 1], @@ -1953,13 +1950,13 @@ defmodule Explorer.DataFrameTest do test "with a simple df and nils" do df = DF.new(a: [1, 2, nil, 3]) - df1 = DF.arrange_with(df, fn ldf -> [asc: ldf["a"]] end, nils: :first) + df1 = DF.sort_with(df, fn ldf -> [asc: ldf["a"]] end, nils: :first) assert DF.to_columns(df1, atom_keys: true) == %{ a: [nil, 1, 2, 3] } - df2 = DF.arrange_with(df, fn ldf -> [asc: ldf["a"]] end, nils: :last) + df2 = DF.sort_with(df, fn ldf -> [asc: ldf["a"]] end, nils: :last) assert DF.to_columns(df2, atom_keys: true) == %{ a: [1, 2, 3, nil] @@ -1970,7 +1967,7 @@ defmodule Explorer.DataFrameTest do df = DF.new(a: [1, 2]) assert_raise RuntimeError, "expecting a lazy series, got: :foo", fn -> - DF.arrange_with(df, fn _ldf -> [desc: :foo] end) + DF.sort_with(df, fn _ldf -> [desc: :foo] end) end end @@ -1980,7 +1977,7 @@ defmodule Explorer.DataFrameTest do message = "expecting a valid direction, which is :asc or :desc, got: :descending" assert_raise RuntimeError, message, fn -> - DF.arrange_with(df, fn ldf -> [descending: ldf["a"]] end) + DF.sort_with(df, fn ldf -> [descending: ldf["a"]] end) end end end