From 101c10c3bff191c89a0e4c69a42c4d5c1da0d865 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Sat, 27 Mar 2021 18:40:34 +0100 Subject: [PATCH 1/3] DOC: #39904: Clarify DataFrame column argument in API documentation. --- pandas/core/frame.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 62341045413a7..999e33f890dd7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -472,8 +472,12 @@ class DataFrame(NDFrame, OpsMixin): Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and no index provided. columns : Index or array-like - Column labels to use for resulting frame. Will default to - RangeIndex (0, 1, 2, ..., n) if no column labels are provided. + Columns to select from data or column labels to use for resulting frame. + Will use the provided labels for the column index if data does not + include column labels, defaulting to RangeIndex(0, 1, 2, ..., n). + If data does include column labels, will select the columns from data matching + with the provided labels to include in the frame, defaulting to + all columns. dtype : dtype, default None Data type to force. Only a single dtype is allowed. If None, infer. copy : bool, default False @@ -523,6 +527,16 @@ class DataFrame(NDFrame, OpsMixin): 1 4 5 6 2 7 8 9 + >>> d = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) + >>> df3 = pd.DataFrame(d, columns=['c', 'a']) + ... + >>> df3 + c a + 0 3 1 + 1 6 4 + 2 9 7 + Constructing DataFrame from dataclass: >>> from dataclasses import make_dataclass From 7beb6f3745411957b0756ff8fcf564c758b011ef Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Mon, 29 Mar 2021 15:38:18 +0200 Subject: [PATCH 2/3] #39904: adjustments for review --- pandas/core/frame.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce7c71154930a..3df933403d13b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -472,12 +472,9 @@ class DataFrame(NDFrame, OpsMixin): Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and no index provided. columns : Index or array-like - Columns to select from data or column labels to use for resulting frame. - Will use the provided labels for the column index if data does not - include column labels, defaulting to RangeIndex(0, 1, 2, ..., n). - If data does include column labels, will select the columns from data matching - with the provided labels to include in the frame, defaulting to - all columns. + Column labels to use for resulting frame when data does not have them, + defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels, + will perform column selection instead. dtype : dtype, default None Data type to force. Only a single dtype is allowed. If None, infer. copy : bool, default False @@ -527,9 +524,9 @@ class DataFrame(NDFrame, OpsMixin): 1 4 5 6 2 7 8 9 - >>> d = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], - ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) - >>> df3 = pd.DataFrame(d, columns=['c', 'a']) + >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) + >>> df3 = pd.DataFrame(data, columns=['c', 'a']) ... >>> df3 c a From 1f7c2f59e1acad0a9e2c84cc8aac33c056f807dd Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Mon, 29 Mar 2021 15:58:11 +0200 Subject: [PATCH 3/3] #39904: Add description to docstring example. --- pandas/core/frame.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3df933403d13b..60eb0339b1f1d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -524,6 +524,8 @@ class DataFrame(NDFrame, OpsMixin): 1 4 5 6 2 7 8 9 + Constructing DataFrame from a numpy ndarray that has labeled columns: + >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) >>> df3 = pd.DataFrame(data, columns=['c', 'a'])