diff --git a/CHANGELOG.md b/CHANGELOG.md index 67f181f4c..8e9bf07e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,106 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add `explode/2` to `Explorer.DataFrame`. This function is useful to expand + the contents of a `{:list, inner_dtype}` series into a "`inner_dtype`" series. + +- Add the new series functions `all?/1` and `any?/1`, to work with boolean series. + +- Add support for the "struct" dtype. This new dtype represents the struct + dtype from Polars/Arrow. + +- Add `map/2` and `map_with/2` to the `Explorer.Series` module. + This change enables the usage of the `Explore.Query` features in a series. + +- Add `sort_by/2` and `sort_with/2` to the `Explorer.Series` module. + This change enables the usage of the lazy computations and the `Explorer.Query` + module. + +- Add `unnest/2` to `Explorer.DataFrame`. It works by taking the fields of a "struct" - + the new dtype - and transform them into columns. + +- Add pairwise correlation - `Explorer.DataFrame.correlation/2` - to calculate the + correlation between numeric columns inside a data frame. + +- Add pairwise covariance - `Explorer.DataFrame.covariance/2` - to calculate the + covariance between numeric columns inside a data frame. + +- Add support for more integer dtypes. This change introduces new signed and + unsigned integer dtypes: + - `{:s, 8}`, `{:s, 16}`, `{:s, 32}` + - `{:u, 8}`, `{:u, 16}`, `{:u, 32}`, `{:u, 64}`. + + The existing `:integer` dtype is now represented as `{:s, 64}`, and it's still + the default dtype for integers. But series and data frames can now work with the + new dtypes. Short names for these new dtypes can be used in functions like + `Explorer.Series.from_list/2`. For example, `{:u, 32}` can be represented with + the atom `:u32`. + + This may bring more interoperability with Nx, and with Arrow related things, like + ADBC and Parquet. + +- Add `ewm_standard_deviation/2` and `ewm_variance/2` to `Explorer.Series`. + They calculate the "exponentially weighted moving" variance and standard deviation. + +- Add support for `:skip_rows_after_header` option for the CSV reader functions. + +- Support `{:list, numeric_dtype}` for `Explorer.Series.frequencies/1`. + +- Support pins in `cond`, inside the context of `Explorer.Query`. + +- Introduce the `:null` dtype. This is a special dtype from Polars and Apache Arrow + to represent "all null" series. + +- Add `Explorer.DataFrame.transpose/2` to transpose a data frame. + +### Changed + +- Rename the functions related to sorting/arranging of the `Explorer.DataFrame`. + Now `arrange_with` is named `sort_with`, and `arrange` is `sort_by`. + + The `sort_by/3` is a macro and it is going to work using the `Explorer.Query` + module. On the other side, the `sort_with/2` uses a callback function. + +- Remove unnecessary casts to `{:s, 64}` now that we support more integer dtypes. + It affects some functions, like the following in the `Explorer.Series` module: + + - `argsort` + - `count` + - `rank` + - `day_of_week`, `day_of_year`, `week_of_year`, `month`, `year`, `hour`, `minute`, `second` + - `abs` + - `clip` + - `lengths` + - `slice` + - `n_distinct` + - `frequencies` + + And also some functions from the `Explorer.DataFrame` module: + + - `mutate` - mostly because of series changes + - `summarise` - mostly because of series changes + - `slice` + +### Fixed + +- Fix inspection of series and data frames between nodes. + +- Fix cast of `:string` series to `{:datetime, any()}` + +- Fix mismatched types in `Explorer.Series.pow/2`, making it more consistent. + +- Normalize sorting options. + +- Fix functions with dtype mismatching the result from Polars. + This fix is affecting the following functions: + + - `quantile/2` in the context of a lazy series + - `mode/1` inside a summarisation + - `strftime/2` in the context of a lazy series + - `mutate_with/2` when creating a column from a `NaiveDateTime` or `Explorer.Duration`. + ## [v0.7.2] - 2023-11-30 ### Added diff --git a/mix.lock b/mix.lock index 4fe008d7c..accd491e2 100644 --- a/mix.lock +++ b/mix.lock @@ -14,7 +14,7 @@ "dll_loader_helper_beam": {:hex, :dll_loader_helper_beam, "1.2.0", "557c43befb8e3b119b718da302adccde3bd855acdb999498a14a2a8d2814b8b9", [:rebar3], [], "hexpm", "a2115d4bf1cca488a7b33f3c648847f64019b32c0382d10286d84dd5c3cbc0e5"}, "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, "elixir_make": {:hex, :elixir_make, "0.7.7", "7128c60c2476019ed978210c245badf08b03dbec4f24d05790ef791da11aa17c", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}], "hexpm", "5bc19fff950fad52bbe5f211b12db9ec82c6b34a9647da0c2224b8b8464c7e6c"}, - "ex_doc": {:hex, :ex_doc, "0.31.0", "06eb1dfd787445d9cab9a45088405593dd3bb7fe99e097eaa71f37ba80c7a676", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5350cafa6b7f77bdd107aa2199fe277acf29d739aba5aee7e865fc680c62a110"}, + "ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"}, "fss": {:hex, :fss, "0.1.1", "9db2344dbbb5d555ce442ac7c2f82dd975b605b50d169314a20f08ed21e08642", [:mix], [], "hexpm", "78ad5955c7919c3764065b21144913df7515d52e228c09427a004afe9c1a16b0"}, "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},