From 9cea28e2610db4bc609e1e9c75c8fff636958e3b Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 3 Oct 2023 13:35:51 +0200 Subject: [PATCH] [Protocol] Make Column.get_buffers() docstring more explicit Several implementations got ``Column.get_buffers()`` wrong by assuming the buffers dtypes would be the same as the column dtype. Clarify to eliminate any ambiguity. See https://github.com/apache/arrow/issues/37598 for example. --- protocol/dataframe_protocol.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/protocol/dataframe_protocol.py b/protocol/dataframe_protocol.py index aee959af..ea2765ad 100644 --- a/protocol/dataframe_protocol.py +++ b/protocol/dataframe_protocol.py @@ -345,6 +345,12 @@ def get_buffers(self) -> ColumnBuffers: """ Return a dictionary containing the underlying buffers. + Each buffer has its own dtype which can be distinct from the + column's dtype. For example, a column with the ``STRING`` dtype + could be represented by two buffers: a "data" buffer with + dtype ``INT`` and bit width 8, and an "offsets" buffer with + dtype ``INT`` and bit width 32. + The returned dictionary has the following contents: - "data": a two-element tuple whose first element is a buffer