From 6bae86cf4699edf8b33f5e7532e2e925202ee7e4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Sep 2023 10:41:44 +0100 Subject: [PATCH 1/2] update/insert columns -> with_columns --- .../dataframe_api/dataframe_object.py | 46 ++++--------------- spec/purpose_and_scope.md | 2 +- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 6460eb07..751b0e60 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -180,53 +180,27 @@ def filter(self, mask: Column[Bool]) -> DataFrame: """ ... - def insert_column(self, column: Column[Any]) -> DataFrame: + def with_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: """ - Insert column into DataFrame at rightmost location. + Insert new column(s), or update values in existing ones. - The column's name will be used as the label in the resulting dataframe. - To insert the column with a different name, combine with `Column.rename`, - e.g.: + If inserting new columns, the column's names will be used as the labels, + and the columns will be inserted at the rightmost location. - .. code-block:: python - - new_column = df.get_column_by_name('a') + 1 - df = df.insert_column(new_column.rename('a_plus_1')) - - If you need to insert the column at a different location, combine with - :meth:`select`, e.g.: - - .. code-block:: python - - new_column = df.get_column_by_name('a') + 1 - new_columns_names = ['a_plus_1'] + df.column_names - df = df.insert_column(new_column.rename('a_plus_1')) - df = df.select(new_column_names) - - Parameters - ---------- - column : Column - """ - ... - - def update_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: - """ - Update values in existing column(s) from Dataframe. - - The column's name will be used to tell which column to update. - To update a column with a different name, combine with :meth:`Column.rename`, - e.g.: + If updating existing columns, their names will be used to tell which columns + to update. To update a column with a different name, combine with + :meth:`Column.rename`, e.g.: .. code-block:: python new_column = df.get_column_by_name('a') + 1 - df = df.update_column(new_column.rename('b')) + df = df.with_columns(new_column.rename('b')) Parameters ---------- columns : Column | Sequence[Column] - Column(s) to update. If updating multiple columns, they must all have - different names. + Column(s) to update/insert. If updating/inserting multiple columns, + they must all have different names. Returns ------- diff --git a/spec/purpose_and_scope.md b/spec/purpose_and_scope.md index 7f0ce606..ae0d8e57 100644 --- a/spec/purpose_and_scope.md +++ b/spec/purpose_and_scope.md @@ -296,7 +296,7 @@ def my_dataframe_agnostic_function(df): continue new_column = df.get_column_by_name(column_name) new_column = (new_column - new_column.mean()) / new_column.std() - df = df.insert_column(new_column.rename(f'{column_name}_scaled')) + df = df.with_columns(new_column.rename(f'{column_name}_scaled')) return df.dataframe From 36f9c589c00a8d3c2415b7b4521329d5adbb2a26 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 28 Sep 2023 17:17:49 +0100 Subject: [PATCH 2/2] with_columns -> assign --- spec/API_specification/dataframe_api/dataframe_object.py | 4 ++-- spec/purpose_and_scope.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py index 751b0e60..79ac59aa 100644 --- a/spec/API_specification/dataframe_api/dataframe_object.py +++ b/spec/API_specification/dataframe_api/dataframe_object.py @@ -180,7 +180,7 @@ def filter(self, mask: Column[Bool]) -> DataFrame: """ ... - def with_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: + def assign(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame: """ Insert new column(s), or update values in existing ones. @@ -194,7 +194,7 @@ def with_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataF .. code-block:: python new_column = df.get_column_by_name('a') + 1 - df = df.with_columns(new_column.rename('b')) + df = df.assign(new_column.rename('b')) Parameters ---------- diff --git a/spec/purpose_and_scope.md b/spec/purpose_and_scope.md index ae0d8e57..7e04bce8 100644 --- a/spec/purpose_and_scope.md +++ b/spec/purpose_and_scope.md @@ -296,7 +296,7 @@ def my_dataframe_agnostic_function(df): continue new_column = df.get_column_by_name(column_name) new_column = (new_column - new_column.mean()) / new_column.std() - df = df.with_columns(new_column.rename(f'{column_name}_scaled')) + df = df.assign(new_column.rename(f'{column_name}_scaled')) return df.dataframe