Skip to content

Commit

Permalink
apacheGH-33321: Support converting to non-nano datetime64 for pandas …
Browse files Browse the repository at this point in the history
…>= 2.0
  • Loading branch information
danepitkin committed Jun 14, 2023
1 parent 4a53764 commit 9703ec2
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
7 changes: 5 additions & 2 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1674,8 +1674,11 @@ cdef _array_like_to_pandas(obj, options, types_mapper):
original_type = obj.type
name = obj._name

# ARROW-3789(wesm): Convert date/timestamp types to datetime64[ns]
c_options.coerce_temporal_nanoseconds = True
# ARROW-33321 reenables support for date/timestamp conversion in pandas >= 2.0
from pyarrow.vendored.version import Version
if pandas_api.loose_version < Version('2.0.0'):
# ARROW-3789(wesm): Convert date/timestamp types to datetime64[ns]
c_options.coerce_temporal_nanoseconds = True

if isinstance(obj, Array):
with nogil:
Expand Down
15 changes: 13 additions & 2 deletions python/pyarrow/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ PandasOptions MakeInnerOptions(PandasOptions options) {
// datetime.datetime does not support nanoseconds).
// We force the object conversion to preserve the value of the timezone.
// Nanoseconds are returned as integers.
// In ARROW-33321, we no longer need to coerce for pandas versions >= 2.0,
// which now support all temporal types.
options.coerce_temporal_nanoseconds = false;

return options;
Expand Down Expand Up @@ -2060,9 +2062,18 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
case Type::DATE64:
if (options.date_as_object) {
*output_type = PandasWriter::OBJECT;
} else if (options.coerce_temporal_nanoseconds) {
*output_type = PandasWriter::DATETIME_NANO;
} else {
*output_type = options.coerce_temporal_nanoseconds ? PandasWriter::DATETIME_NANO
: PandasWriter::DATETIME_DAY;
const auto& dt_type = checked_cast<const DateType&>(*data.type());
switch (dt_type.unit()) {
case DateUnit::DAY:
*output_type = PandasWriter::DATETIME_DAY;
break;
case DateUnit::MILLI:
*output_type = PandasWriter::DATETIME_MILLI;
break;
}
}
break;
case Type::TIMESTAMP: {
Expand Down
7 changes: 5 additions & 2 deletions python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2908,8 +2908,11 @@ def table_to_blocks(options, Table table, categories, extension_columns):
c_options.extension_columns = {tobytes(col)
for col in extension_columns}

# ARROW-3789(wesm); Convert date/timestamp types to datetime64[ns]
c_options.coerce_temporal_nanoseconds = True
# ARROW-33321 reenables support for date/timestamp conversion in pandas >= 2.0
from pyarrow.vendored.version import Version
if pandas_api.loose_version < Version('2.0.0'):
# ARROW-3789(wesm); Convert date/timestamp types to datetime64[ns]
c_options.coerce_temporal_nanoseconds = True

if c_options.self_destruct:
# Move the shared_ptr, table is now unsafe to use further
Expand Down

0 comments on commit 9703ec2

Please sign in to comment.