From c131955b65f1577529d1a6a15bb8fc9000f3d735 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 May 2024 11:30:31 +0200 Subject: [PATCH 1/4] Fix pandas internals changes --- .github/workflows/ci.yml | 2 +- partd/pandas.py | 39 ++++++++++++++++++--------------------- pyproject.toml | 9 ++++----- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d58bce..db8cdce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source diff --git a/partd/pandas.py b/partd/pandas.py index 5a25c26..b79e9f7 100644 --- a/partd/pandas.py +++ b/partd/pandas.py @@ -1,37 +1,31 @@ from functools import partial import pickle -import numpy as np import pandas as pd -from pandas.core.internals import create_block_manager_from_blocks, make_block + +try: + from pandas.core.internals.managers import create_block_manager_from_blocks +except ImportError: + from pandas.core.internals import create_block_manager_from_blocks + +from pandas.core.internals import make_block +from packaging.version import Version from . import numpy as pnp from .core import Interface from .encode import Encode from .utils import extend, framesplit, frame +from pandas.api.types import is_extension_array_dtype +from pandas.api.extensions import ExtensionArray -try: - # pandas >= 0.24.0 - from pandas.api.types import is_extension_array_dtype -except ImportError: - def is_extension_array_dtype(dtype): - return False - -try: - # Some `ExtensionArray`s can have a `.dtype` which is not a `ExtensionDtype` - # (e.g. they can be backed by a NumPy dtype). For these cases we check - # whether the instance is a `ExtensionArray`. - # https://github.com/dask/partd/issues/48 - from pandas.api.extensions import ExtensionArray - def is_extension_array(x): - return isinstance(x, ExtensionArray) -except ImportError: - def is_extension_array(x): - return False +def is_extension_array(x): + return isinstance(x, ExtensionArray) dumps = partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL) +PANDAS_GE_210 = Version(pd.__version__).release >= (2, 1, 0) + class PandasColumns(Interface): def __init__(self, partd=None): @@ -195,7 +189,10 @@ def deserialize(bytes): index_from_header_bytes(headers[1], bytes[1])] blocks = [block_from_header_bytes(h, b) for (h, b) in zip(headers[2:], bytes[2:])] - return pd.DataFrame(create_block_manager_from_blocks(blocks, axes)) + if PANDAS_GE_210: + return pd.DataFrame._from_mgr(create_block_manager_from_blocks(blocks, axes), axes=axes) + else: + return pd.DataFrame(create_block_manager_from_blocks(blocks, axes)) def join(dfs): diff --git a/pyproject.toml b/pyproject.toml index 051a159..1eca1aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,15 +10,14 @@ license = {text = "BSD"} keywords = [] classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] readme = "README.rst" urls = {Homepage = "http://github.com/dask/partd/"} -requires-python = ">=3.7" +requires-python = ">=3.9" dynamic = ["version"] dependencies = [ "locket", @@ -27,8 +26,8 @@ dependencies = [ [project.optional-dependencies] complete = [ - "numpy >= 1.9.0", - "pandas >=0.19.0", + "numpy >= 1.20.0", + "pandas >=1.3", "pyzmq", "blosc", ] From dadc882065eef5b838d009c3b420bc487446dc88 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 May 2024 14:10:21 +0200 Subject: [PATCH 2/4] Make futureproof --- partd/pandas.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/partd/pandas.py b/partd/pandas.py index b79e9f7..3787c2c 100644 --- a/partd/pandas.py +++ b/partd/pandas.py @@ -2,14 +2,22 @@ import pickle import pandas as pd +from packaging.version import Version -try: - from pandas.core.internals.managers import create_block_manager_from_blocks -except ImportError: - from pandas.core.internals import create_block_manager_from_blocks +PANDAS_GE_210 = Version(pd.__version__).release >= (2, 1, 0) +PANDAS_GE_300 = Version(pd.__version__).major >= 3 -from pandas.core.internals import make_block -from packaging.version import Version +if PANDAS_GE_300: + from pandas.api.internals import create_dataframe_from_blocks + create_block_manager_from_blocks= None + make_block = None +else: + try: + from pandas.core.internals.managers import create_block_manager_from_blocks + except ImportError: + from pandas.core.internals import create_block_manager_from_blocks + + from pandas.core.internals import make_block from . import numpy as pnp from .core import Interface @@ -24,7 +32,6 @@ def is_extension_array(x): dumps = partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL) -PANDAS_GE_210 = Version(pd.__version__).release >= (2, 1, 0) class PandasColumns(Interface): @@ -142,7 +149,7 @@ def block_to_header_bytes(block): return header, bytes -def block_from_header_bytes(header, bytes): +def block_from_header_bytes(header, bytes, create_block: bool): placement, dtype, shape, (extension_type, extension_values) = header if extension_type == "other": @@ -158,7 +165,9 @@ def block_from_header_bytes(header, bytes): tz_info = extension_values[0] values = pd.DatetimeIndex(values).tz_localize('utc').tz_convert( tz_info) - return make_block(values, placement=placement) + if create_block: + return make_block(values, placement=placement) + return values, placement def serialize(df): @@ -187,9 +196,11 @@ def deserialize(bytes): bytes = frames[1:] axes = [index_from_header_bytes(headers[0], bytes[0]), index_from_header_bytes(headers[1], bytes[1])] - blocks = [block_from_header_bytes(h, b) + blocks = [block_from_header_bytes(h, b, create_block=not PANDAS_GE_300) for (h, b) in zip(headers[2:], bytes[2:])] - if PANDAS_GE_210: + if PANDAS_GE_300: + return pd.api.internals.create_dataframe_from_blocks(blocks, axes[1], axes[0]) + elif PANDAS_GE_210: return pd.DataFrame._from_mgr(create_block_manager_from_blocks(blocks, axes), axes=axes) else: return pd.DataFrame(create_block_manager_from_blocks(blocks, axes)) From d81604614543374b1ed24b59a5b5bbc84df1c615 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 May 2024 14:11:12 +0200 Subject: [PATCH 3/4] Make futureproof --- partd/pandas.py | 1 + 1 file changed, 1 insertion(+) diff --git a/partd/pandas.py b/partd/pandas.py index 3787c2c..a09734d 100644 --- a/partd/pandas.py +++ b/partd/pandas.py @@ -12,6 +12,7 @@ create_block_manager_from_blocks= None make_block = None else: + create_dataframe_from_blocks = None try: from pandas.core.internals.managers import create_block_manager_from_blocks except ImportError: From 9b4d2800fd8143e2c6a3a32022f31464d787ca1f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 May 2024 14:11:42 +0200 Subject: [PATCH 4/4] Make futureproof --- partd/pandas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/partd/pandas.py b/partd/pandas.py index a09734d..880558c 100644 --- a/partd/pandas.py +++ b/partd/pandas.py @@ -9,7 +9,7 @@ if PANDAS_GE_300: from pandas.api.internals import create_dataframe_from_blocks - create_block_manager_from_blocks= None + create_block_manager_from_blocks = None make_block = None else: create_dataframe_from_blocks = None