From 0642028e4923adddfb204fa9331e2e89de454252 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Thu, 31 Aug 2023 11:22:51 +0200
Subject: [PATCH] replace column with expression

---
 .../dataframe_api/__init__.py                 | 164 ++++---
 .../dataframe_api/dataframe_object.py         | 186 ++------
 ...{column_object.py => expression_object.py} | 441 ++++++++----------
 ...olumn_object.rst => expression_object.rst} |   8 +-
 spec/API_specification/index.rst              |  12 +-
 spec/purpose_and_scope.md                     |   7 +-
 6 files changed, 355 insertions(+), 463 deletions(-)
 rename spec/API_specification/dataframe_api/{column_object.py => expression_object.py} (56%)
 rename spec/API_specification/{column_object.rst => expression_object.rst} (50%)

diff --git a/spec/API_specification/dataframe_api/__init__.py b/spec/API_specification/dataframe_api/__init__.py
index 1e7d57b4..e39aa4ff 100644
--- a/spec/API_specification/dataframe_api/__init__.py
+++ b/spec/API_specification/dataframe_api/__init__.py
@@ -3,9 +3,9 @@
 """
 from __future__ import annotations
 
-from typing import Mapping, Sequence, Any
+from typing import Mapping, Sequence, Any, Literal
 
-from .column_object import *
+from .expression_object import *
 from .dataframe_object import DataFrame
 from .groupby_object import *
 from ._types import DType
@@ -13,11 +13,10 @@
 __all__ = [
     "__dataframe_api_version__",
     "DataFrame",
-    "Column",
-    "column_from_sequence",
-    "column_from_1d_array",
+    "col",
     "concat",
-    "dataframe_from_dict",
+    "sorted_indices",
+    "unique_indices",
     "dataframe_from_2d_array",
     "is_null",
     "null",
@@ -43,6 +42,21 @@
 implementation of the dataframe API standard.
 """
 
+def col(name: str) -> Expression:
+    """
+    Instantiate an Expression which selects given column by name.
+
+    For example, to select column 'species' and then use it to filter
+    a DataFrame, you could do:
+
+    .. code-block::python
+
+        df: DataFrame
+        namespace = df.__dataframe_namespace__()
+        df.get_rows_by_mask(pl.col('species') == 'setosa')
+    """
+    ...
+
 def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     Concatenate DataFrames vertically.
@@ -63,104 +77,116 @@ def concat(dataframes: Sequence[DataFrame]) -> DataFrame:
     """
     ...
 
-def column_from_sequence(sequence: Sequence[Any], *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]:
+def any_rowwise(keys: list[str] | None = None, *, skip_nulls: bool = True) -> Expression:
     """
-    Construct Column from sequence of elements.
+    Reduction returns an Expression.
+
+    Differs from ``DataFrame.any`` and that the reduction happens
+    for each row, rather than for each column.
 
     Parameters
     ----------
-    sequence : Sequence[object]
-        Sequence of elements. Each element must be of the specified
-        ``dtype``, the corresponding Python builtin scalar type, or
-        coercible to that Python scalar type.
-    name : str, optional
-        Name of column.
-    dtype : DType
-        Dtype of result. Must be specified.
-    api_version: str | None
-        A string representing the version of the dataframe API specification
-        in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-        If it is ``None``, it should return an object corresponding to
-        latest version of the dataframe API specification.  If the given
-        version is invalid or not implemented for the given module, an
-        error should be raised. Default: ``None``.
+    keys : list[str]
+        Column names to consider. If `None`, all columns are considered.
 
-    Returns
-    -------
-    Column
+    Raises
+    ------
+    ValueError
+        If any of the DataFrame's columns is not boolean.
     """
     ...
 
-def dataframe_from_dict(data: Mapping[str, Column[Any]], *, api_version: str | None = None) -> DataFrame:
+def all_rowwise(keys: list[str] | None = None, *, skip_nulls: bool = True) -> Expression:
     """
-    Construct DataFrame from map of column names to Columns.
+    Reduction returns a Column.
+
+    Differs from ``DataFrame.all`` and that the reduction happens
+    for each row, rather than for each column.
 
     Parameters
     ----------
-    data : Mapping[str, Column]
-        Column must be of the corresponding type of the DataFrame.
-        For example, it is only supported to build a ``LibraryXDataFrame`` using
-        ``LibraryXColumn`` instances.
-    api_version: str | None
-        A string representing the version of the dataframe API specification
-        in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-        If it is ``None``, it should return an object corresponding to
-        latest version of the dataframe API specification.  If the given
-        version is invalid or not implemented for the given module, an
-        error should be raised. Default: ``None``.
+    keys : list[str]
+        Column names to consider. If `None`, all columns are considered.
 
-    Returns
-    -------
-    DataFrame
-    
     Raises
     ------
     ValueError
-        If any of the columns already has a name, and the corresponding key
-        in `data` doesn't match.
-
+        If any of the DataFrame's columns is not boolean.
     """
     ...
 
+def sorted_indices(
+    keys: str | list[str] | None = None,
+    *,
+    ascending: Sequence[bool] | bool = True,
+    nulls_position: Literal['first', 'last'] = 'last',
+) -> Expression:
+    """
+    Return row numbers which would sort according to given columns.
+
+    If you need to sort the DataFrame, use :meth:`DataFrame.sort`.
 
-def column_from_1d_array(array: Any, *, dtype: Any, name: str = '', api_version: str | None = None) -> Column[Any]:
+    Parameters
+    ----------
+    keys : str | list[str], optional
+        Names of columns to sort by.
+        If `None`, sort by all columns.
+    ascending : Sequence[bool] or bool
+        If `True`, sort by all keys in ascending order.
+        If `False`, sort by all keys in descending order.
+        If a sequence, it must be the same length as `keys`,
+        and determines the direction with which to use each
+        key to sort by.
+    nulls_position : ``{'first', 'last'}``
+        Whether null values should be placed at the beginning
+        or at the end of the result.
+        Note that the position of NaNs is unspecified and may
+        vary based on the implementation.
+
+    Returns
+    -------
+    Expression
+
+    Raises
+    ------
+    ValueError
+        If `keys` and `ascending` are sequences of different lengths.
     """
-    Construct Column from 1D array.
+    ...
 
-    See `dataframe_from_2d_array` for related 2D function.
 
-    Only Array-API-compliant 1D arrays are supported.
-    Cross-kind casting is undefined and may vary across implementations.
-    Downcasting is disallowed.
+def unique_indices(keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Expression:
+    """
+    Return indices corresponding to unique values across selected columns.
 
     Parameters
     ----------
-    array : array
-        array-API compliant 1D array
-    name : str, optional
-        Name to give columns.
-    dtype : DType
-        Dtype of column.
-    api_version: str | None
-        A string representing the version of the dataframe API specification
-        in ``'YYYY.MM'`` form, for example, ``'2023.04'``.
-        If it is ``None``, it should return an object corresponding to
-        latest version of the dataframe API specification.  If the given
-        version is invalid or not implemented for the given module, an
-        error should be raised. Default: ``None``.
+    keys : str | list[str], optional
+        Column names to consider when finding unique values.
+        If `None`, all columns are considered.
 
     Returns
     -------
-    Column
+    Expression
+        Indices corresponding to unique values.
+
+    Notes
+    -----
+    There are no ordering guarantees. In particular, if there are multiple
+    indices corresponding to the same unique value(s), there is no guarantee
+    about which one will appear in the result.
+    If the original column(s) contain multiple `'NaN'` values, then
+    only a single index corresponding to those values will be returned.
+    Likewise for null values (if ``skip_nulls=False``).
+    To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
     """
     ...
 
-def dataframe_from_2d_array(array: Any, *, names: Sequence[str], dtypes: Mapping[str, Any], api_version: str | None = None) -> DataFrame:
+
+def dataframe_from_2d_array(array: Any, *, names: Sequence[str], dtypes: Mapping[str, Any]) -> DataFrame:
     """
     Construct DataFrame from 2D array.
 
-    See `column_from_1d_array` for related 1D function.
-
     Only Array-API-compliant 2D arrays are supported.
     Cross-kind casting is undefined and may vary across implementations.
     Downcasting is disallowed.
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
index 7383e1d2..8bf1fb86 100644
--- a/spec/API_specification/dataframe_api/dataframe_object.py
+++ b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -4,7 +4,7 @@
 
 
 if TYPE_CHECKING:
-    from .column_object import Column
+    from .expression_object import Expression
     from .groupby_object import GroupBy
     from . import Bool
     from ._types import NullType, Scalar
@@ -90,36 +90,30 @@ def groupby(self, keys: str | list[str], /) -> GroupBy:
         """
         ...
 
-    def get_column_by_name(self, name: str, /) -> Column[Any]:
+    def select(self, names: Sequence[str | Expression], /) -> DataFrame:
         """
-        Select a column by name.
+        Select multiple columns, either by name or by expressions.
 
         Parameters
         ----------
-        name : str
+        names : Sequence[str]
 
         Returns
         -------
-        Column
+        DataFrame
 
-        Raises
-        ------
-        KeyError
-            If the key is not present.
-        """
-        ...
+        Examples
+        --------
+        Select columns 'a' and 'b':
 
-    def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
-        """
-        Select multiple columns by name.
+        >>> df: DataFrame
+        >>> df.select(['a', 'b'])
 
-        Parameters
-        ----------
-        names : Sequence[str]
+        You can also pass expressions:
 
-        Returns
-        -------
-        DataFrame
+        >>> df: DataFrame
+        >>> namespace = df.__dataframe_namespace__()
+        >>> df.select(['a', namespace.col('b')+1])
 
         Raises
         ------
@@ -128,13 +122,13 @@ def get_columns_by_name(self, names: Sequence[str], /) -> DataFrame:
         """
         ...
 
-    def get_rows(self, indices: Column[Any]) -> DataFrame:
+    def get_rows(self, indices: Expression) -> DataFrame:
         """
         Select a subset of rows, similar to `ndarray.take`.
 
         Parameters
         ----------
-        indices : Column[int]
+        indices : Expression
             Positions of rows to select.
 
         Returns
@@ -161,70 +155,85 @@ def slice_rows(
         """
         ...
 
-    def get_rows_by_mask(self, mask: Column[Bool]) -> DataFrame:
+    def get_rows_by_mask(self, mask: Expression) -> DataFrame:
         """
         Select a subset of rows corresponding to a mask.
 
         Parameters
         ----------
-        mask : Column[bool]
+        mask : Expression
 
         Returns
         -------
         DataFrame
 
-        Notes
-        -----
-        Some participants preferred a weaker type Arraylike[bool] for mask,
-        where 'Arraylike' denotes an object adhering to the Array API standard.
+        Examples
+        --------
+        
+        Here is how you could keep rows in a dataframe where the values in
+        column 'a' are greater than 3:
+
+        >>> df: DataFrame
+        >>> namespace = df.__dataframe_namespace__()
+        >>> mask = namespace.col('a') > 3
+        >>> df = df.get_rows_by_mask(mask)
         """
         ...
 
-    def insert_column(self, column: Column[Any]) -> DataFrame:
+    def insert_column(self, column: Expression) -> DataFrame:
         """
         Insert column into DataFrame at rightmost location.
 
         The column's name will be used as the label in the resulting dataframe.
-        To insert the column with a different name, combine with `Column.rename`,
+        To insert the column with a different name, combine with `Expression.rename`,
         e.g.:
 
         .. code-block:: python
 
-            new_column = df.get_column_by_name('a') + 1
+            df: DataFrame
+            namespace = df.__dataframe_namespace__()
+            col = namespace.col
+            new_column = namespace.col('a') + 1
             df = df.insert_column(new_column.rename('a_plus_1'))
         
         If you need to insert the column at a different location, combine with
-        :meth:`get_columns_by_name`, e.g.:
+        :meth:`select`, e.g.:
 
         .. code-block:: python
 
-            new_column = df.get_column_by_name('a') + 1
+            df: DataFrame
+            namespace = df.__dataframe_namespace__()
+            col = namespace.col
+            new_column = namespace.col('a') + 1
             new_columns_names = ['a_plus_1'] + df.get_column_names()
             df = df.insert_column(new_column.rename('a_plus_1'))
-            df = df.get_columns_by_name(new_column_names)
+            df = df.select(new_column_names)
 
         Parameters
         ----------
-        column : Column
+        expression : Expression
         """
         ...
 
-    def update_columns(self, columns: Column[Any] | Sequence[Column[Any]], /) -> DataFrame:
+    def update_columns(self, columns: Expression | Sequence[Expression], /) -> DataFrame:
         """
         Update values in existing column(s) from Dataframe.
 
         The column's name will be used to tell which column to update.
-        To update a column with a different name, combine with :meth:`Column.rename`,
+        To update a column with a different name, combine with :meth:`Expression.rename`,
         e.g.:
 
         .. code-block:: python
 
-            new_column = df.get_column_by_name('a') + 1
-            df = df.update_column(new_column.rename('b'))
+            df: DataFrame
+            namespace = df.__dataframe_namespace__()
+            col = namespace.col
+            new_column = namespace.col('a') + 1
+            df = df.update_columns(new_column.rename('b'))
 
         Parameters
         ----------
-        columns : Column | Sequence[Column]
+        columns : Expression | Sequence[Expression]
             Column(s) to update. If updating multiple columns, they must all have
             different names.
 
@@ -289,7 +298,7 @@ def sort(
         Sort dataframe according to given columns.
 
         If you only need the indices which would sort the dataframe, use
-        :meth:`sorted_indices`.
+        :func:`dataframe_api.sorted_indices`.
 
         Parameters
         ----------
@@ -319,46 +328,6 @@ def sort(
         """
         ...
 
-    def sorted_indices(
-        self,
-        keys: str | list[str] | None = None,
-        *,
-        ascending: Sequence[bool] | bool = True,
-        nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[Any]:
-        """
-        Return row numbers which would sort according to given columns.
-
-        If you need to sort the DataFrame, use :meth:`sort`.
-
-        Parameters
-        ----------
-        keys : str | list[str], optional
-            Names of columns to sort by.
-            If `None`, sort by all columns.
-        ascending : Sequence[bool] or bool
-            If `True`, sort by all keys in ascending order.
-            If `False`, sort by all keys in descending order.
-            If a sequence, it must be the same length as `keys`,
-            and determines the direction with which to use each
-            key to sort by.
-        nulls_position : ``{'first', 'last'}``
-            Whether null values should be placed at the beginning
-            or at the end of the result.
-            Note that the position of NaNs is unspecified and may
-            vary based on the implementation.
-
-        Returns
-        -------
-        Column[int]
-    
-        Raises
-        ------
-        ValueError
-            If `keys` and `ascending` are sequences of different lengths.
-        """
-        ...
-
     def __eq__(self, other: DataFrame | Scalar) -> DataFrame:  # type: ignore[override]
         """
         Compare for equality.
@@ -692,34 +661,6 @@ def all(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         ...
     
-    def any_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.any`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
-    def all_rowwise(self, *, skip_nulls: bool = True) -> Column[Bool]:
-        """
-        Reduction returns a Column.
-
-        Differs from ``DataFrame.all`` and that the reduction happens
-        for each row, rather than for each column.
-
-        Raises
-        ------
-        ValueError
-            If any of the DataFrame's columns is not boolean.
-        """
-        ...
-
     def min(self, *, skip_nulls: bool = True) -> DataFrame:
         """
         Reduction returns a 1-row DataFrame.
@@ -826,33 +767,6 @@ def is_nan(self) -> DataFrame:
         """
         ...
 
-    def unique_indices(self, keys: str | list[str] | None = None, *, skip_nulls: bool = True) -> Column[int]:
-        """
-        Return indices corresponding to unique values across selected columns.
-
-        Parameters
-        ----------
-        keys : str | list[str], optional
-            Column names to consider when finding unique values.
-            If `None`, all columns are considered.
-
-        Returns
-        -------
-        Column[int]
-            Indices corresponding to unique values.
-
-        Notes
-        -----
-        There are no ordering guarantees. In particular, if there are multiple
-        indices corresponding to the same unique value(s), there is no guarantee
-        about which one will appear in the result.
-        If the original column(s) contain multiple `'NaN'` values, then
-        only a single index corresponding to those values will be returned.
-        Likewise for null values (if ``skip_nulls=False``).
-        To get the unique values, you can do ``df.get_rows(df.unique_indices(keys))``.
-        """
-        ...
-
     def fill_nan(self, value: float | NullType, /) -> DataFrame:
         """
         Fill ``nan`` values with the given fill value.
diff --git a/spec/API_specification/dataframe_api/column_object.py b/spec/API_specification/dataframe_api/expression_object.py
similarity index 56%
rename from spec/API_specification/dataframe_api/column_object.py
rename to spec/API_specification/dataframe_api/expression_object.py
index c8eb666f..6fdf4f68 100644
--- a/spec/API_specification/dataframe_api/column_object.py
+++ b/spec/API_specification/dataframe_api/expression_object.py
@@ -9,85 +9,76 @@
     from ._types import NullType, Scalar
 
 
-__all__ = ['Column']
+__all__ = ['Expression']
 
 
-class Column(Generic[DType]):
+class Expression:
     """
-    Column object
+    Expression object, which maps a DataFrame to a column.
 
-    Note that this column object is not meant to be instantiated directly by
-    users of the library implementing the dataframe API standard. Rather, use
-    constructor functions or an already-created dataframe object retrieved via
+    Not meant to be used directly - instead, use :func:`dataframe_api.col`.
 
-    """
+    An expression is a function which maps a DataFrame to a column, and can be
+    used within the context of:
 
-    def __column_namespace__(self) -> Any:
-        """
-        Returns an object that has all the Dataframe Standard API functions on it.
+    - :meth:`DataFrame.select`
+    - :meth:`DataFrame.insert_column`
+    - :meth:`DataFrame.update_columns`
+    - :meth:`DataFrame.get_rows_by_mask`
 
-        Returns
-        -------
-        namespace: Any
-            An object representing the dataframe API namespace. It should have
-            every top-level function defined in the specification as an
-            attribute. It may contain other public names as well, but it is
-            recommended to only include those names that are part of the
-            specification.
+    Example:
 
-        """
-    
-    @property
-    def column(self) -> Any:
-        """
-        Return underlying (not-necessarily-Standard-compliant) column.
+    .. code-block::python
 
-        If a library only implements the Standard, then this can return `self`.
-        """
-        ...
+        df: DataFrame
+        namespace = df.__dataframe_namespace__()
+        col = namespace.col
+        df = df.select(col(['a', 'b']))
     
-    @property
-    def name(self) -> str:
-        """Return name of column."""
+    resolves to (pandas syntax):
 
-    def __len__(self) -> int:
-        """
-        Return the number of rows.
-        """
-
-    def __iter__(self) -> NoReturn:
-        """
-        Iterate over elements.
+    .. code-block::python
 
-        This is intentionally "poisoned" to discourage inefficient code patterns.
+        df: pd.DataFrame
+        df = df.loc[:, ['a', 'b']]
+    
+    Multiple column calls can be chained together. For example:
+
+    .. code-block::python
+
+        df: DataFrame
+        namespace = df.__dataframe_namespace__()
+        col = namespace.col
+        new_column = (
+            (col('petal_width') - col('petal_width').mean())
+            .rename('petal_width_centered')
+        )
+        df = df.insert_column(new_column)
+    
+    resolves to (pandas syntax)
 
-        Raises
-        ------
-        NotImplementedError
-        """
-        raise NotImplementedError("'__iter__' is intentionally not implemented.")
+    .. code-block::python
 
-    @property
-    def dtype(self) -> Any:
-        """
-        Return data type of column.
-        """
+        df: pd.DataFrame
+        new_column = (
+            (df['petal_width'] - df['petal_width'].mean())
+            .rename('petal_width_centered')
+        )
+        df[new_column.name] = new_column
+    """
 
-    def get_rows(self: Column[DType], indices: Column[Any]) -> Column[DType]:
+    def __len__(self) -> Expression:
         """
-        Select a subset of rows, similar to `ndarray.take`.
-
-        Parameters
-        ----------
-        indices : Column[int]
-            Positions of rows to select.
+        Return the number of rows.
         """
-        ...
 
+    @property
+    def name(self) -> str:
+        """Return output name of expression."""
 
     def slice_rows(
-        self: Column[DType], start: int | None, stop: int | None, step: int | None
-    ) -> Column[DType]:
+        self: Expression, start: int | None, stop: int | None, step: int | None
+    ) -> Expression:
         """
         Select a subset of rows corresponding to a slice.
 
@@ -99,32 +90,25 @@ def slice_rows(
 
         Returns
         -------
-        Column
+        Expression
         """
         ...
 
-
-    def get_rows_by_mask(self: Column[DType], mask: Column[Bool]) -> Column[DType]:
+    def get_rows_by_mask(self, mask: Expression) -> Expression:
         """
         Select a subset of rows corresponding to a mask.
 
         Parameters
         ----------
-        mask : Column[bool]
+        mask : Expression
 
         Returns
         -------
-        Column
-
-        Notes
-        -----
-        Some participants preferred a weaker type Arraylike[bool] for mask,
-        where 'Arraylike' denotes an object adhering to the Array API standard.
+        Expression
         """
         ...
 
-
-    def get_value(self, row_number: int) -> Scalar:
+    def get_value(self, row_number: int) -> Expression:
         """
         Select the value at a row number, similar to `ndarray.__getitem__(<int>)`.
 
@@ -135,9 +119,7 @@ def get_value(self, row_number: int) -> Scalar:
         
         Returns
         -------
-        Scalar
-            Depends on the dtype of the Column, and may vary
-            across implementations.
+        Expression
         """
         ...
 
@@ -146,12 +128,12 @@ def sort(
         *,
         ascending: bool = True,
         nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[DType]:
+    ) -> Expression:
         """
-        Sort column.
+        Sort expression.
 
-        If you need the indices which would sort the column,
-        use :meth:`sorted_indices`.
+        If you need the indices which would sort the expression,
+        use :func:`sorted_indices`.
 
         Parameters
         ----------
@@ -166,7 +148,7 @@ def sort(
 
         Returns
         -------
-        Column
+        Expression
         """
         ...
 
@@ -175,11 +157,11 @@ def sorted_indices(
         *,
         ascending: bool = True,
         nulls_position: Literal['first', 'last'] = 'last',
-    ) -> Column[Any]:
+    ) -> Expression:
         """
-        Return row numbers which would sort column.
+        Return row numbers which would sort expression.
 
-        If you need to sort the Column, use :meth:`sort`.
+        If you need to sort the expression, use :meth:`sort`.
 
         Parameters
         ----------
@@ -194,11 +176,11 @@ def sorted_indices(
 
         Returns
         -------
-        Column[int]
+        Expression
         """
         ...
 
-    def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __eq__(self, other: Expression | Scalar) -> Expression:  # type: ignore[override]
         """
         Compare for equality.
 
@@ -206,17 +188,17 @@ def __eq__(self, other: Column[Any] | Scalar) -> Column[Bool]:  # type: ignore[o
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:  # type: ignore[override]
+    def __ne__(self: Expression, other: Expression | Scalar) -> Expression:  # type: ignore[override]
         """
         Compare for non-equality.
 
@@ -224,94 +206,94 @@ def __ne__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __ge__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
+    def __ge__(self: Expression, other: Expression | Scalar) -> Expression:
         """
         Compare for "greater than or equal to" `other`.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __gt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
+    def __gt__(self: Expression, other: Expression | Scalar) -> Expression:
         """
         Compare for "greater than" `other`.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __le__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
+    def __le__(self: Expression, other: Expression | Scalar) -> Expression:
         """
         Compare for "less than or equal to" `other`.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __lt__(self: Column[DType], other: Column[DType] | Scalar) -> Column[Bool]:
+    def __lt__(self: Expression, other: Expression | Scalar) -> Expression:
         """
         Compare for "less than" `other`.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
+    def __and__(self: Expression, other: Expression | bool) -> Expression:
         """
-        Apply logical 'and' to `other` Column (or scalar) and this Column.
+        Apply logical 'and' to `other` expression (or scalar) and this expression.
 
         Nulls should follow Kleene Logic.
 
         Parameters
         ----------
-        other : Column[bool] or bool
-            If Column, must have same length.
+        other : Expression[bool] or bool
+            If expression, must have same length.
 
         Returns
         -------
-        Column
+        Expression
 
         Raises
         ------
@@ -319,20 +301,20 @@ def __and__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
+    def __or__(self: Expression, other: Expression | bool) -> Expression:
         """
-        Apply logical 'or' to `other` Column (or scalar) and this column.
+        Apply logical 'or' to `other` expression (or scalar) and this expression.
 
         Nulls should follow Kleene Logic.
 
         Parameters
         ----------
-        other : Column[bool] or Scalar
-            If Column, must have same length.
+        other : Expression[bool] or Scalar
+            If expression, must have same length.
 
         Returns
         -------
-        Column[bool]
+        Expression[bool]
 
         Raises
         ------
@@ -340,89 +322,89 @@ def __or__(self: Column[Bool], other: Column[Bool] | bool) -> Column[Bool]:
             If `self` or `other` is not boolean.
         """
 
-    def __add__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]:
+    def __add__(self: Expression, other: Expression | Scalar) -> Expression:
         """
-        Add `other` column or scalar to this column.
+        Add `other` expression or scalar to this expression.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __sub__(self: Column[Any], other: Column[Any] | Scalar) -> Column[Any]:
+    def __sub__(self: Expression, other: Expression | Scalar) -> Expression:
         """
-        Subtract `other` column or scalar from this column.
+        Subtract `other` expression or scalar from this expression.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __mul__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __mul__(self, other: Expression | Scalar) -> Expression:
         """
-        Multiply `other` column or scalar with this column.
+        Multiply `other` expression or scalar with this expression.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __truediv__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __truediv__(self, other: Expression | Scalar) -> Expression:
         """
-        Divide this column by `other` column or scalar. True division, returns floats.
+        Divide this expression by `other` expression or scalar. True division, returns floats.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __floordiv__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __floordiv__(self, other: Expression | Scalar) -> Expression:
         """
-        Floor-divide `other` column or scalar to this column.
+        Floor-divide `other` expression or scalar to this expression.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __pow__(self, other: Expression | Scalar) -> Expression:
         """
-        Raise this column to the power of `other`.
+        Raise this expression to the power of `other`.
 
         Integer dtype to the power of non-negative integer dtype is integer dtype.
         Integer dtype to the power of float dtype is float dtype.
@@ -430,104 +412,104 @@ def __pow__(self, other: Column[Any] | Scalar) -> Column[Any]:
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __mod__(self, other: Column[Any] | Scalar) -> Column[Any]:
+    def __mod__(self, other: Expression | Scalar) -> Expression:
         """
-        Returns modulus of this column by `other` (`%` operator).
+        Returns modulus of this expression by `other` (`%` operator).
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        Expression
         """
 
-    def __divmod__(self, other: Column[Any] | Scalar) -> tuple[Column[Any], Column[Any]]:
+    def __divmod__(self, other: Expression | Scalar) -> tuple[Expression, Expression]:
         """
         Return quotient and remainder of integer division. See `divmod` builtin function.
 
         Parameters
         ----------
-        other : Column or Scalar
-            If Column, must have same length.
+        other : Expression or Scalar
+            If expression, must have same length.
             "Scalar" here is defined implicitly by what scalar types are allowed
             for the operation by the underling dtypes.
 
         Returns
         -------
-        Column
+        tuple[Expression, Expression]
         """
 
-    def __invert__(self: Column[Bool]) -> Column[Bool]:
+    def __invert__(self: Expression) -> Expression:
         """
         Invert truthiness of (boolean) elements.
 
         Raises
         ------
         ValueError
-            If any of the Column's columns is not boolean.
+            If any of the expression's expressions is not boolean.
         """
 
-    def any(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType:
+    def any(self: Expression, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a bool.
 
         Raises
         ------
         ValueError
-            If column is not boolean.
+            If expression is not boolean.
         """
 
-    def all(self: Column[Bool], *, skip_nulls: bool = True) -> bool | NullType:
+    def all(self: Expression, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a bool.
 
         Raises
         ------
         ValueError
-            If column is not boolean.
+            If expression is not boolean.
         """
 
-    def min(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def min(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
+        must be supported. The returned value has the same dtype as the expression.
         """
 
-    def max(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def max(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
+        must be supported. The returned value has the same dtype as the expression.
         """
 
-    def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def sum(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
-        column.
+        expression.
         """
 
-    def prod(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def prod(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical data types.
-        The returned value has the same dtype as the column.
+        The returned value has the same dtype as the expression.
         """
 
-    def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def median(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -535,7 +517,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
         dtypes.
         """
 
-    def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
+    def mean(self, *, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -543,7 +525,7 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
         dtypes.
         """
 
-    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar | NullType:
+    def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -559,17 +541,17 @@ def std(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar
             where ``N`` corresponds to the total number of elements over which
             the standard deviation is computed. When computing the standard
             deviation of a population, setting this parameter to ``0`` is the
-            standard choice (i.e., the provided column contains data
+            standard choice (i.e., the provided expression contains data
             constituting an entire population). When computing the corrected
             sample standard deviation, setting this parameter to ``1`` is the
-            standard choice (i.e., the provided column contains data sampled
+            standard choice (i.e., the provided expression contains data sampled
             from a larger population; this is commonly referred to as Bessel's
             correction). Fractional (float) values are allowed. Default: ``1``.
         skip_nulls
             Whether to skip null values.
         """
 
-    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar | NullType:
+    def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Expression:
         """
         Reduction returns a scalar. Must be supported for numerical and
         datetime data types. Returns a float for numerical data types, and
@@ -581,44 +563,44 @@ def var(self, *, correction: int | float = 1, skip_nulls: bool = True) -> Scalar
         correction
             Correction to apply to the result. For example, ``0`` for sample
             standard deviation and ``1`` for population standard deviation.
-            See `Column.std` for a more detailed description.
+            See `expression.std` for a more detailed description.
         skip_nulls
             Whether to skip null values.
         """
 
-    def cumulative_max(self: Column[DType]) -> Column[DType]:
+    def cumulative_max(self: Expression) -> Expression:
         """
-        Reduction returns a Column. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
+        Reduction returns a expression. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the expression.
         """
 
-    def cumulative_min(self: Column[DType]) -> Column[DType]:
+    def cumulative_min(self: Expression) -> Expression:
         """
-        Reduction returns a Column. Any data type that supports comparisons
-        must be supported. The returned value has the same dtype as the column.
+        Reduction returns a expression. Any data type that supports comparisons
+        must be supported. The returned value has the same dtype as the expression.
         """
 
-    def cumulative_sum(self: Column[DType]) -> Column[DType]:
+    def cumulative_sum(self: Expression) -> Expression:
         """
-        Reduction returns a Column. Must be supported for numerical and
+        Reduction returns a expression. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
-        column.
+        expression.
         """
 
-    def cumulative_prod(self: Column[DType]) -> Column[DType]:
+    def cumulative_prod(self: Expression) -> Expression:
         """
-        Reduction returns a Column. Must be supported for numerical and
+        Reduction returns a expression. Must be supported for numerical and
         datetime data types. The returned value has the same dtype as the
-        column.
+        expression.
         """
 
-    def is_null(self) -> Column[Bool]:
+    def is_null(self) -> Expression:
         """
         Check for 'missing' or 'null' entries.
 
         Returns
         -------
-        Column
+        Expression
 
         See also
         --------
@@ -631,13 +613,13 @@ def is_null(self) -> Column[Bool]:
         but note that the Standard makes no guarantees about them.
         """
 
-    def is_nan(self) -> Column[Bool]:
+    def is_nan(self) -> Expression:
         """
         Check for nan entries.
 
         Returns
         -------
-        Column
+        Expression
 
         See also
         --------
@@ -650,31 +632,31 @@ def is_nan(self) -> Column[Bool]:
         In particular, does not check for `np.timedelta64('NaT')`.
         """
 
-    def is_in(self: Column[DType], values: Column[DType]) -> Column[Bool]:
+    def is_in(self: Expression, values: Expression) -> Expression:
         """
         Indicate whether the value at each row matches any value in `values`.
 
         Parameters
         ----------
-        values : Column
+        values : Expression
             Contains values to compare against. May include ``float('nan')`` and
             ``null``, in which case ``'nan'`` and ``null`` will
             respectively return ``True`` even though ``float('nan') == float('nan')``
             isn't ``True``.
-            The dtype of ``values`` must match the current column's dtype.
+            The dtype of ``values`` must match the current expression's dtype.
 
         Returns
         -------
-        Column[bool]
+        Expression[bool]
         """
 
-    def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
+    def unique_indices(self, *, skip_nulls: bool = True) -> Expression:
         """
-        Return indices corresponding to unique values in Column.
+        Return indices corresponding to unique values in expression.
 
         Returns
         -------
-        Column[int]
+        Expression[int]
             Indices corresponding to unique values.
 
         Notes
@@ -682,87 +664,52 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Column[Any]:
         There are no ordering guarantees. In particular, if there are multiple
         indices corresponding to the same unique value, there is no guarantee
         about which one will appear in the result.
-        If the original Column contains multiple `'NaN'` values, then
+        If the original expression contains multiple `'NaN'` values, then
         only a single index corresponding to those values will be returned.
         Likewise for null values (if ``skip_nulls=False``).
         To get the unique values, you can do ``col.get_rows(col.unique_indices())``.
         """
         ...
 
-    def fill_nan(self: Column[DType], value: float | NullType, /) -> Column[DType]:
+    def fill_nan(self: Expression, value: float | NullType, /) -> Expression:
         """
         Fill floating point ``nan`` values with the given fill value.
 
         Parameters
         ----------
         value : float or `null`
-            Value used to replace any ``nan`` in the column with. Must be
-            of the Python scalar type matching the dtype of the column (or
+            Value used to replace any ``nan`` in the expression with. Must be
+            of the Python scalar type matching the dtype of the expression (or
             be `null`).
 
         """
         ...
 
-    def fill_null(self: Column[DType], value: Scalar, /) -> Column[DType]:
+    def fill_null(self: Expression, value: Scalar, /) -> Expression:
         """
         Fill null values with the given fill value.
 
         Parameters
         ----------
         value : Scalar
-            Value used to replace any ``null`` values in the column with.
-            Must be of the Python scalar type matching the dtype of the column.
+            Value used to replace any ``null`` values in the expression with.
+            Must be of the Python scalar type matching the dtype of the expression.
 
         """
         ...
 
-    def to_array_object(self, dtype: Any) -> Any:
-        """
-        Convert to array-API-compliant object.
-
-        Parameters
-        ----------
-        dtype : DType
-            The dtype of the array-API-compliant object to return.
-            Must be one of:
-
-            - Bool()
-            - Int8()
-            - Int16()
-            - Int32()
-            - Int64()
-            - UInt8()
-            - UInt16()
-            - UInt32()
-            - UInt64()
-            - Float32()
-            - Float64()
-        
-        Returns
-        -------
-        Any
-            An array-API-compliant object.
-        
-        Notes
-        -----
-        While numpy arrays are not yet array-API-compliant, implementations
-        may choose to return a numpy array (for numpy prior to 2.0), with the
-        understanding that consuming libraries would then use the
-        ``array-api-compat`` package to convert it to a Standard-compliant array.
-        """
-
-    def rename(self, name: str) -> Column[DType]:
+    def rename(self, name: str) -> Expression:
         """
-        Rename column.
+        Rename expression.
 
         Parameters
         ----------
         name : str
-            New name for column.
+            New name for expression.
         
         Returns
         -------
-        Column
-            New column - this does not operate in-place.
+        Expression
+            New expression - this does not operate in-place.
         """
         ...
diff --git a/spec/API_specification/column_object.rst b/spec/API_specification/expression_object.rst
similarity index 50%
rename from spec/API_specification/column_object.rst
rename to spec/API_specification/expression_object.rst
index 3201b500..83e53a2e 100644
--- a/spec/API_specification/column_object.rst
+++ b/spec/API_specification/expression_object.rst
@@ -1,12 +1,12 @@
 .. _column-object:
 
-Column object
-=============
+Expression object
+=================
 
 A conforming implementation of the dataframe API standard must provide and
-support a column object having the following methods, attributes, and
+support an expression object having the following methods, attributes, and
 behavior.
 
 .. currentmodule:: dataframe_api
 
-.. autoclass:: Column
+.. autoclass:: Expression
diff --git a/spec/API_specification/index.rst b/spec/API_specification/index.rst
index 1809c87a..779ed854 100644
--- a/spec/API_specification/index.rst
+++ b/spec/API_specification/index.rst
@@ -16,6 +16,11 @@ of objects and functions in the top-level namespace. The latter are:
    __dataframe_api_version__
    is_null
    null
+   col
+   sorted_indices
+   unique_indices
+   any_rowwise
+   all_rowwise
    Int64
    Int32
    Int16
@@ -28,17 +33,14 @@ of objects and functions in the top-level namespace. The latter are:
    Float32
    Bool
    is_dtype
-   column_from_sequence
-   column_from_1d_array
-   dataframe_from_dict
    dataframe_from_2d_array
 
-The ``DataFrame``, ``Column`` and ``GroupBy`` objects have the following
+The ``DataFrame``, ``Expression`` and ``GroupBy`` objects have the following
 methods and attributes:
 
 .. toctree::
    :maxdepth: 3
 
    dataframe_object
-   column_object
+   expression_object
    groupby_object
diff --git a/spec/purpose_and_scope.md b/spec/purpose_and_scope.md
index 199d1a74..45e6e759 100644
--- a/spec/purpose_and_scope.md
+++ b/spec/purpose_and_scope.md
@@ -285,16 +285,19 @@ df_polars = pl.scan_parquet('iris.parquet')
 
 def my_dataframe_agnostic_function(df):
     df = df.__dataframe_consortium_standard__(api_version='2023.08-beta')
+    namespace = df.__dataframe_namespace__()
 
-    mask = df.get_column_by_name('species') != 'setosa'
+    mask = namespace.col('species') != 'setosa'
     df = df.get_rows_by_mask(mask)
 
+    new_columns = []
     for column_name in df.get_column_names():
         if column_name == 'species':
             continue
         new_column = df.get_column_by_name(column_name)
         new_column = (new_column - new_column.mean()) / new_column.std()
-        df = df.insert(loc=len(df.get_column_names()), label=f'{column_name}_scaled', value=new_column)
+        new_columns.append(new_columns)
+    df = df.update_columns(new_columns)
 
     return df.dataframe