diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py index 1da1a916914..3f5e748daf4 100644 --- a/python/pyarrow/cffi.py +++ b/python/pyarrow/cffi.py @@ -17,7 +17,7 @@ from __future__ import absolute_import -import cffi +from cffi import FFI c_source = """ struct ArrowSchema { @@ -77,5 +77,5 @@ """ # TODO use out-of-line mode for faster import and avoid C parsing -ffi = cffi.FFI() +ffi = FFI() ffi.cdef(c_source) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index bb54c3b22c3..5a5e7b10f28 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -25,7 +25,7 @@ from copy import deepcopy import decimal from itertools import zip_longest -import json +from json import dumps as json_dumps import operator import re import warnings @@ -276,7 +276,7 @@ def construct_metadata(columns_to_convert, df, column_names, index_levels, index_descriptors = index_column_metadata = column_indexes = [] return { - b'pandas': json.dumps({ + b'pandas': json_dumps({ 'index_columns': index_descriptors, 'column_indexes': column_indexes, 'columns': column_metadata + index_column_metadata, @@ -511,7 +511,7 @@ def _get_index_level(df, name): def _level_name(name): # preserve type when default serializable, otherwise str it try: - json.dumps(name) + json_dumps(name) return name except TypeError: return str(name) @@ -826,7 +826,7 @@ def table_to_dataframe( axes = [columns, index] mgr = BlockManager(blocks, axes) if _pandas_api.is_ge_v21(): - df = DataFrame._from_mgr(mgr, mgr.axes) + df = DataFrame._from_mgr(mgr, mgr.axes) # type: ignore[unresolved-attribute] else: df = DataFrame(mgr) return df diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index aaf15c20288..8c1a2ae7822 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -21,7 +21,7 @@ from functools import reduce import inspect -import json +from json import loads as json_loads import os import re import operator @@ -1192,7 +1192,7 @@ def add_key_value_metadata(self, key_value_metadata): def _get_pandas_index_columns(keyvalues): - return (json.loads(keyvalues[b'pandas'].decode('utf8')) + return (json_loads(keyvalues[b'pandas'].decode('utf8')) ['index_columns']) diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py index 703232b7cac..7f647883561 100644 --- a/python/pyarrow/tests/parquet/test_pandas.py +++ b/python/pyarrow/tests/parquet/test_pandas.py @@ -16,7 +16,7 @@ # under the License. import io -import json +from json import loads as json_loads try: import numpy as np @@ -65,7 +65,7 @@ def test_pandas_parquet_custom_metadata(tempdir): metadata = pq.read_metadata(filename).metadata assert b'pandas' in metadata - js = json.loads(metadata[b'pandas'].decode('utf8')) + js = json_loads(metadata[b'pandas'].decode('utf8')) assert js['index_columns'] == [{'kind': 'range', 'name': None, 'start': 0, 'stop': 10000, diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py index 24ffe612ef7..aef0954eacd 100644 --- a/python/pyarrow/tests/parquet/test_parquet_file.py +++ b/python/pyarrow/tests/parquet/test_parquet_file.py @@ -408,7 +408,7 @@ def test_parquet_file_hugginface_support(): pytest.skip("fsspec is not installed, skipping Hugging Face test") fake_hf_module = types.ModuleType("huggingface_hub") - fake_hf_module.HfFileSystem = MemoryFileSystem + fake_hf_module.HfFileSystem = MemoryFileSystem # type: ignore[unresolved-attribute] with mock.patch.dict("sys.modules", {"huggingface_hub": fake_hf_module}): uri = "hf://datasets/apache/arrow/test.parquet" table = pa.table({"a": range(10)}) diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index e0116a4bb76..fdacb16be29 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -191,7 +191,7 @@ def test_visit_strings(tmpdir): strings = ['a', 'b', 'c'] visited = [] - mod._visit_strings(strings, visited.append) + mod._visit_strings(strings, visited.append) # type: ignore[unresolved-attribute] assert visited == strings @@ -200,4 +200,4 @@ def raise_on_b(s): if s == 'b': raise ValueError('wtf') - mod._visit_strings(strings, raise_on_b) + mod._visit_strings(strings, raise_on_b) # type: ignore[unresolved-attribute] diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index ebac37e862b..ea1c0afd7ff 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -1353,11 +1353,11 @@ def test_cpp_extension_in_python(tmpdir): sys.path.insert(0, str(tmpdir)) mod = __import__('extensions') - uuid_type = mod._make_uuid_type() + uuid_type = mod._make_uuid_type() # type: ignore[unresolved-attribute] assert uuid_type.extension_name == "example-uuid" assert uuid_type.storage_type == pa.binary(16) - array = mod._make_uuid_array() + array = mod._make_uuid_array() # type: ignore[unresolved-attribute] assert array.type == uuid_type assert array.to_pylist() == [b'abcdefghijklmno0', b'0onmlkjihgfedcba'] assert array[0].as_py() == b'abcdefghijklmno0' diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py index db26f0df4fa..3fcac064876 100644 --- a/python/pyarrow/tests/test_flight.py +++ b/python/pyarrow/tests/test_flight.py @@ -26,7 +26,8 @@ import threading import time import traceback -import json +from json import dumps as json_dumps +from json import dumps as json_loads from datetime import datetime try: @@ -64,9 +65,19 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): pass class FlightServerBase(MockContextManager): - pass + def serve(self): + pass class FlightClient(MockContextManager): - pass + def get_flight_info(self, **kwargs): + pass + def do_action(self, **kwargs): + pass + def do_get(self, **kwargs): + pass + def do_put(self, **kwargs): + pass + def close(self): + pass ServerAuthHandler, ClientAuthHandler = object, object ServerMiddleware, ServerMiddlewareFactory = object, object ClientMiddleware, ClientMiddlewareFactory = object, object @@ -330,7 +341,7 @@ class InvalidStreamFlightServer(FlightServerBase): def do_get(self, context, ticket): data1 = [pa.array([-10, -5, 0, 5, 10], type=pa.int32())] data2 = [pa.array([-10.0, -5.0, 0.0, 5.0, 10.0], type=pa.float64())] - assert data1.type != data2.type + assert data1[0].type != data2[0].type table1 = pa.Table.from_arrays(data1, names=['a']) table2 = pa.Table.from_arrays(data2, names=['a']) assert table1.schema == self.schema @@ -1757,7 +1768,7 @@ def test_flight_do_put_limit(): with pytest.raises(flight.FlightWriteSizeExceededError, match="exceeded soft limit") as excinfo: writer.write_batch(large_batch) - assert excinfo.value.limit == 4096 + assert excinfo.value.limit == 4096 # type: ignore[unresolved-attribute] smaller_batches = [ large_batch.slice(0, 384), large_batch.slice(384), @@ -2371,7 +2382,7 @@ class ActionNoneFlightServer(EchoFlightServer): def do_action(self, context, action): if action.type == "get_value": - return [json.dumps(self.VALUES).encode('utf-8')] + return [json_dumps(self.VALUES).encode('utf-8')] elif action.type == "append": self.VALUES.append(True) return None @@ -2388,7 +2399,7 @@ def test_none_action_side_effect(): FlightClient(('localhost', server.port)) as client: client.do_action(flight.Action("append", b"")) r = client.do_action(flight.Action("get_value", b"")) - assert json.loads(next(r).body.to_pybytes()) == [True] + assert json_loads(next(r).body.to_pybytes()) == [True] @pytest.mark.slow # Takes a while for gRPC to "realize" writes fail diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py index c3f9fe333bd..68ac40063c9 100644 --- a/python/pyarrow/tests/test_json.py +++ b/python/pyarrow/tests/test_json.py @@ -20,7 +20,7 @@ from decimal import Decimal import io import itertools -import json +from json import dumps as json_dumps import string import unittest @@ -49,7 +49,7 @@ def make_random_json(num_cols=2, num_rows=10, linesep='\r\n'): lines = [] for row in arr.T: json_obj = OrderedDict([(k, int(v)) for (k, v) in zip(col_names, row)]) - lines.append(json.dumps(json_obj)) + lines.append(json_dumps(json_obj)) data = linesep.join(lines).encode() columns = [pa.array(col, type=pa.int64()) for col in arr] expected = pa.Table.from_arrays(columns, col_names) diff --git a/python/pyarrow/tests/test_jvm.py b/python/pyarrow/tests/test_jvm.py index d2ba780efc7..d71380b8666 100644 --- a/python/pyarrow/tests/test_jvm.py +++ b/python/pyarrow/tests/test_jvm.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -import json +from json import dumps as json_dumps import os import pyarrow as pa import pyarrow.jvm as pa_jvm @@ -175,23 +175,23 @@ def test_jvm_types(root_allocator, pa_type, jvm_spec, nullable): # TODO: This needs to be set for complex types 'children': [] } - jvm_field = _jvm_field(json.dumps(spec)) + jvm_field = _jvm_field(json_dumps(spec)) result = pa_jvm.field(jvm_field) expected_field = pa.field('field_name', pa_type, nullable=nullable) assert result == expected_field - jvm_schema = _jvm_schema(json.dumps(spec)) + jvm_schema = _jvm_schema(json_dumps(spec)) result = pa_jvm.schema(jvm_schema) assert result == pa.schema([expected_field]) # Schema with custom metadata - jvm_schema = _jvm_schema(json.dumps(spec), {'meta': 'data'}) + jvm_schema = _jvm_schema(json_dumps(spec), {'meta': 'data'}) result = pa_jvm.schema(jvm_schema) assert result == pa.schema([expected_field], {'meta': 'data'}) # Schema with custom field metadata spec['metadata'] = [{'key': 'field meta', 'value': 'field data'}] - jvm_schema = _jvm_schema(json.dumps(spec)) + jvm_schema = _jvm_schema(json_dumps(spec)) result = pa_jvm.schema(jvm_schema) expected_field = expected_field.with_metadata( {'field meta': 'field data'}) @@ -379,7 +379,7 @@ def test_jvm_record_batch(root_allocator, pa_type, py_data, jvm_type, # TODO: This needs to be set for complex types 'children': [] } - jvm_field = _jvm_field(json.dumps(spec)) + jvm_field = _jvm_field(json_dumps(spec)) # Create VectorSchemaRoot jvm_fields = jpype.JClass('java.util.ArrayList')() diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index ceea2527da0..f0bc4a31f34 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -17,7 +17,7 @@ import gc import decimal -import json +from json import dumps as json_dumps import multiprocessing as mp import sys import warnings @@ -3264,7 +3264,8 @@ def test_error_sparse(self): df = pd.DataFrame({'a': pd.arrays.SparseArray([1, np.nan, 3])}) except AttributeError: # pandas.arrays module introduced in pandas 0.24 - df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])}) + from pandas import SparseArray + df = pd.DataFrame({'a': SparseArray([1, np.nan, 3])}) with pytest.raises(TypeError, match="Sparse pandas data"): pa.Table.from_pandas(df) @@ -4422,11 +4423,12 @@ def test_convert_to_extension_array(monkeypatch): # monkeypatch pandas Int64Dtype to *not* have the protocol method if Version(pd.__version__) < Version("1.3.0.dev"): + from pandas.core import integer monkeypatch.delattr( - pd.core.arrays.integer._IntegerDtype, "__from_arrow__") + integer._IntegerDtype, "__from_arrow__") else: monkeypatch.delattr( - pd.core.arrays.integer.NumericDtype, "__from_arrow__") + pd.core.arrays.integer.NumericDtype, "__from_arrow__") # type: ignore[unresolved-attribute] # Int64Dtype has no __from_arrow__ -> use normal conversion result = table.to_pandas() assert len(_get_mgr(result).blocks) == 1 @@ -4467,11 +4469,12 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch): # monkeypatch pandas Int64Dtype to *not* have the protocol method # (remove the version added above and the actual version for recent pandas) if Version(pd.__version__) < Version("1.3.0.dev"): + from pandas.core import integer monkeypatch.delattr( - pd.core.arrays.integer._IntegerDtype, "__from_arrow__") + integer._IntegerDtype, "__from_arrow__") else: monkeypatch.delattr( - pd.core.arrays.integer.NumericDtype, "__from_arrow__") + pd.core.arrays.integer.NumericDtype, "__from_arrow__") # type: ignore[unresolved-attribute] result = arr.to_pandas() assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64") @@ -4650,7 +4653,7 @@ def test_metadata_compat_range_index_pre_0_12(): t1 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['a', 'qux']) t1 = t1.replace_schema_metadata({ - b'pandas': json.dumps( + b'pandas': json_dumps( {'index_columns': ['qux'], 'column_indexes': [{'name': None, 'field_name': None, @@ -4679,7 +4682,7 @@ def test_metadata_compat_range_index_pre_0_12(): t2 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['qux', gen_name_0]) t2 = t2.replace_schema_metadata({ - b'pandas': json.dumps( + b'pandas': json_dumps( {'index_columns': [gen_name_0], 'column_indexes': [{'name': None, 'field_name': None, @@ -4708,7 +4711,7 @@ def test_metadata_compat_range_index_pre_0_12(): t3 = pa.Table.from_arrays([a_arrow, rng_index_arrow], names=['a', gen_name_0]) t3 = t3.replace_schema_metadata({ - b'pandas': json.dumps( + b'pandas': json_dumps( {'index_columns': [gen_name_0], 'column_indexes': [{'name': None, 'field_name': None, @@ -4737,7 +4740,7 @@ def test_metadata_compat_range_index_pre_0_12(): t4 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow], names=['a', 'qux', gen_name_1]) t4 = t4.replace_schema_metadata({ - b'pandas': json.dumps( + b'pandas': json_dumps( {'index_columns': ['qux', gen_name_1], 'column_indexes': [{'name': None, 'field_name': None, @@ -4771,7 +4774,7 @@ def test_metadata_compat_range_index_pre_0_12(): t5 = pa.Table.from_arrays([a_arrow, rng_index_arrow, b_arrow], names=['a', gen_name_0, gen_name_1]) t5 = t5.replace_schema_metadata({ - b'pandas': json.dumps( + b'pandas': json_dumps( {'index_columns': [gen_name_0, gen_name_1], 'column_indexes': [{'name': None, 'field_name': None, @@ -4818,7 +4821,7 @@ def test_metadata_compat_missing_field_name(): # metadata generated by fastparquet 0.3.2 with missing field_names table = table.replace_schema_metadata({ - b'pandas': json.dumps({ + b'pandas': json_dumps({ 'column_indexes': [ {'field_name': None, 'metadata': None, diff --git a/python/pyarrow/vendored/docscrape.py b/python/pyarrow/vendored/docscrape.py index 6c4d6e01400..096ef245243 100644 --- a/python/pyarrow/vendored/docscrape.py +++ b/python/pyarrow/vendored/docscrape.py @@ -105,6 +105,10 @@ def is_empty(self): class ParseError(Exception): + def __init__(self, *args, docstring=None, **kwargs): + self.__init__(*args, **kwargs) + self.docstring = docstring + def __str__(self): message = self.args[0] if hasattr(self, 'docstring'): @@ -153,7 +157,7 @@ def __init__(self, docstring, config=None): try: self._parse() except ParseError as e: - e.docstring = orig_docstring + e.docstring = orig_docstring # type: ignore[unresolved-attribute] raise def __getitem__(self, key): diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py index 53d3dd52bd8..9b833525939 100644 --- a/python/scripts/run_emscripten_tests.py +++ b/python/scripts/run_emscripten_tests.py @@ -130,7 +130,7 @@ def launch_server(dist_dir): address = q.get(timeout=50) time.sleep(0.1) # wait to make sure server is started yield address - p.terminate() + p.join() class NodeDriver: diff --git a/python/setup.py b/python/setup.py index 504c78d61bb..4e87ecfbfcc 100755 --- a/python/setup.py +++ b/python/setup.py @@ -48,7 +48,7 @@ ) -if Cython.__version__ < '3': +if Cython.__version__ < '3': # type: ignore[unresolved-attribute] raise Exception( 'Please update your Cython version. Supported Cython >= 3') diff --git a/python/stubs/__lib_pxi/pandas_shim.pyi b/python/stubs/__lib_pxi/pandas_shim.pyi index 0e80fae4ebf..29a8485d062 100644 --- a/python/stubs/__lib_pxi/pandas_shim.pyi +++ b/python/stubs/__lib_pxi/pandas_shim.pyi @@ -1,7 +1,7 @@ from types import ModuleType from typing import Any, Iterable, TypeGuard -import pandas as pd +import pandas from numpy import dtype from pandas.core.dtypes.base import ExtensionDtype @@ -9,8 +9,8 @@ from pandas.core.dtypes.base import ExtensionDtype class _PandasAPIShim: has_sparse: bool - def series(self, *args, **kwargs) -> pd.Series: ... - def data_frame(self, *args, **kwargs) -> pd.DataFrame: ... + def series(self, *args, **kwargs) -> pandas.Series: ... + def data_frame(self, *args, **kwargs) -> pandas.DataFrame: ... @property def have_pandas(self) -> bool: ... @property @@ -28,21 +28,21 @@ class _PandasAPIShim: def is_ge_v23(self) -> bool: ... def is_ge_v3(self) -> bool: ... @property - def categorical_type(self) -> type[pd.Categorical]: ... + def categorical_type(self) -> type[pandas.Categorical]: ... @property - def datetimetz_type(self) -> type[pd.DatetimeTZDtype]: ... + def datetimetz_type(self) -> type[pandas.DatetimeTZDtype]: ... @property def extension_dtype(self) -> type[ExtensionDtype]: ... def is_array_like( self, obj: Any - ) -> TypeGuard[pd.Series | pd.Index | pd.Categorical | ExtensionDtype]: ... - def is_categorical(self, obj: Any) -> TypeGuard[pd.Categorical]: ... - def is_datetimetz(self, obj: Any) -> TypeGuard[pd.DatetimeTZDtype]: ... + ) -> TypeGuard[pandas.Series | pandas.Index | pandas.Categorical | ExtensionDtype]: ... + def is_categorical(self, obj: Any) -> TypeGuard[pandas.Categorical]: ... + def is_datetimetz(self, obj: Any) -> TypeGuard[pandas.DatetimeTZDtype]: ... def is_extension_array_dtype(self, obj: Any) -> TypeGuard[ExtensionDtype]: ... def is_sparse(self, obj: Any) -> bool: ... - def is_data_frame(self, obj: Any) -> TypeGuard[pd.DataFrame]: ... - def is_series(self, obj: Any) -> TypeGuard[pd.Series]: ... - def is_index(self, obj: Any) -> TypeGuard[pd.Index]: ... + def is_data_frame(self, obj: Any) -> TypeGuard[pandas.DataFrame]: ... + def is_series(self, obj: Any) -> TypeGuard[pandas.Series]: ... + def is_index(self, obj: Any) -> TypeGuard[pandas.Index]: ... def get_values(self, obj: Any) -> bool: ... def get_rangeindex_attribute(self, level, name): ... diff --git a/python/stubs/cffi.pyi b/python/stubs/cffi.pyi index 2ae945c5974..217b4b2ea44 100644 --- a/python/stubs/cffi.pyi +++ b/python/stubs/cffi.pyi @@ -1,4 +1,4 @@ -import cffi +from cffi import FFI c_source: str -ffi: cffi.FFI +ffi: FFI