data-apis · MarcoGorelli · Oct 26, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 25, 2023
diff --git a/spec/API_specification/dataframe_api/dataframe_object.py b/spec/API_specification/dataframe_api/dataframe_object.py
@@ -92,7 +92,7 @@ def group_by(self, *keys: str) -> GroupBy:
         """
         ...
 
-    def get_column_by_name(self, name: str, /) -> Column:
+    def col(self, name: str, /) -> Column:
         """
         Select a column by name.
 
@@ -195,7 +195,7 @@ def assign(self, *columns: Column) -> Self:
 
         .. code-block:: python
 
-            new_column = df.get_column_by_name('a') + 1
+            new_column = df.col('a') + 1
             df = df.assign(new_column.rename('b'))
 
         Parameters

diff --git a/spec/API_specification/dataframe_api/py.typed b/spec/API_specification/dataframe_api/py.typed
diff --git a/spec/API_specification/examples/01_standardise_columns.py b/spec/API_specification/examples/01_standardise_columns.py
@@ -11,7 +11,7 @@ def my_dataframe_agnostic_function(df_non_standard: SupportsDataFrameAPI) -> Any
     for column_name in df.column_names:
         if column_name == 'species':
             continue
-        new_column = df.get_column_by_name(column_name)
+        new_column = df.col(column_name)
         new_column = (new_column - new_column.mean()) / new_column.std()
         df = df.assign(new_column.rename(f'{column_name}_scaled'))
 

diff --git a/spec/API_specification/examples/02_plotting.py b/spec/API_specification/examples/02_plotting.py
@@ -25,7 +25,7 @@ def group_by_and_plot(
     )
 
     agg = df.group_by("color").mean().fill_null(float('nan'))
-    x = agg.get_column_by_name("x").to_array()
-    y = agg.get_column_by_name("y").to_array()
+    x = agg.col("x").to_array()
+    y = agg.col("y").to_array()
 
     my_plotting_function(x, y)
diff --git a/spec/API_specification/examples/tpch/q1.py b/spec/API_specification/examples/tpch/q1.py
@@ -8,16 +8,16 @@ def query(lineitem_raw: SupportsDataFrameAPI) -> Any:
     lineitem = lineitem_raw.__dataframe_consortium_standard__()
     namespace = lineitem.__dataframe_namespace__()
 
-    mask = lineitem.get_column_by_name("l_shipdate") <= namespace.date(1998, 9, 2)
+    mask = lineitem.col("l_shipdate") <= namespace.date(1998, 9, 2)
     lineitem = lineitem.assign(
         (
-            lineitem.get_column_by_name("l_extended_price")
-            * (1 - lineitem.get_column_by_name("l_discount"))
+            lineitem.col("l_extended_price")
+            * (1 - lineitem.col("l_discount"))
         ).rename("l_disc_price"),
         (
-            lineitem.get_column_by_name("l_extended_price")
-            * (1 - lineitem.get_column_by_name("l_discount"))
-            * (1 + lineitem.get_column_by_name("l_tax"))
+            lineitem.col("l_extended_price")
+            * (1 - lineitem.col("l_discount"))
+            * (1 + lineitem.col("l_tax"))
         ).rename("l_charge"),
     )
     result = (

diff --git a/spec/API_specification/examples/tpch/q5.py b/spec/API_specification/examples/tpch/q5.py
@@ -53,19 +53,15 @@ def query(
         )
     )
     mask = (
-        (
-            result.get_column_by_name("c_nationkey")
-            == result.get_column_by_name("s_nationkey")
-        )
-        & (result.get_column_by_name("r_name") == "ASIA")
-        & (result.get_column_by_name("o_orderdate") >= namespace.date(1994, 1, 1))
-        & (result.get_column_by_name("o_orderdate") < namespace.date(1995, 1, 1))
+        (result.col("c_nationkey") == result.col("s_nationkey"))
+        & (result.col("r_name") == "ASIA")
+        & (result.col("o_orderdate") >= namespace.date(1994, 1, 1))
+        & (result.col("o_orderdate") < namespace.date(1995, 1, 1))
     )
     result = result.filter(mask)
 
     new_column = (
-        result.get_column_by_name("l_extendedprice")
-        * (1 - result.get_column_by_name("l_discount"))
+        result.col("l_extendedprice") * (1 - result.col("l_discount"))
     ).rename("revenue")
     result = result.assign(new_column)
     result = result.group_by("n_name").aggregate(namespace.Aggregation.sum("revenue"))

diff --git a/spec/design_topics/python_builtin_types.md b/spec/design_topics/python_builtin_types.md
@@ -14,14 +14,14 @@ the `float` it is documented to return, in combination with the `__gt__` method
 class DataFrame:
     def __gt__(self, other: DataFrame | Scalar) -> DataFrame:
         ...
-    def get_column_by_name(self, name: str, /) -> Column:
+    def col(self, name: str, /) -> Column:
         ...
 
 class Column:
     def mean(self, skip_nulls: bool = True) -> float | NullType:
         ...
 
-larger = df2 > df1.get_column_by_name('foo').mean()
+larger = df2 > df1.col('foo').mean()
 ```
 
 For a GPU dataframe library, it is desirable for all data to reside on the GPU,