Skip to content

Commit

Permalink
Remove Python bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
rok committed Mar 4, 2024
1 parent 1669e9b commit eaeb350
Show file tree
Hide file tree
Showing 10 changed files with 12 additions and 664 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/acero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ if(ARROW_TESTING)
# test_nodes.cc isn't used by all tests but link to it for simple
# CMakeLists.txt.
add_library(arrow_acero_testing OBJECT test_nodes.cc test_util_internal.cc
../extension/tensor_extension_array_test.cc)
../extension/tensor_extension_array_test.cc)
# Even though this is still just an object library we still need to "link" our
# dependencies so that include paths are configured correctly
target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_ACERO_TEST_LINK_LIBS})
Expand Down
1 change: 0 additions & 1 deletion docs/source/python/api/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ may expose data type-specific methods or properties.
UnionArray
ExtensionArray
FixedShapeTensorArray
VariableShapeTensorArray

.. _api.scalar:

Expand Down
5 changes: 1 addition & 4 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ def print_entry(label, value):
dictionary,
run_end_encoded,
fixed_shape_tensor,
variable_shape_tensor,
field,
type_for_alias,
DataType, DictionaryType, StructType,
Expand All @@ -183,8 +182,7 @@ def print_entry(label, value):
FixedSizeBinaryType, Decimal128Type, Decimal256Type,
BaseExtensionType, ExtensionType,
RunEndEncodedType, FixedShapeTensorType,
VariableShapeTensorType, PyExtensionType,
UnknownExtensionType,
PyExtensionType, UnknownExtensionType,
register_extension_type, unregister_extension_type,
DictionaryMemo,
KeyValueMetadata,
Expand Down Expand Up @@ -218,7 +216,6 @@ def print_entry(label, value):
MonthDayNanoIntervalArray,
Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
RunEndEncodedArray, FixedShapeTensorArray,
VariableShapeTensorArray,
scalar, NA, _NULL as NULL, Scalar,
NullScalar, BooleanScalar,
Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
Expand Down
130 changes: 1 addition & 129 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -4222,7 +4222,7 @@ cdef class FixedShapeTensorArray(ExtensionArray):
and the rest of the dimensions will match the permuted shape of the fixed
shape tensor.
The conversion is zero-copy if data is primitive numeric and without nulls.
The conversion is zero-copy.
Returns
-------
Expand Down Expand Up @@ -4320,134 +4320,6 @@ cdef class FixedShapeTensorArray(ExtensionArray):
)


cdef class VariableShapeTensorArray(ExtensionArray):
"""
Concrete class for variable shape tensor extension arrays.
Examples
--------
Define the extension type for tensor array
>>> import pyarrow as pa
>>> tensor_type = pa.variable_shape_tensor(pa.float64(), 2)
Create an extension array
>>> shapes = pa.array([[2, 3], [1, 2]], pa.list_(pa.int32(), 2))
>>> values = pa.array([[1, 2, 3, 4, 5, 6], [7, 8]], pa.list_(pa.float64()))
>>> arr = pa.StructArray.from_arrays([shapes, values], names=["shape", "data"])
>>> pa.ExtensionArray.from_storage(tensor_type, arr)
<pyarrow.lib.VariableShapeTensorArray object at ...>
-- is_valid: all not null
-- child 0 type: fixed_size_list<item: int32>[2]
[
[
2,
3
],
[
1,
2
]
]
-- child 1 type: list<item: double>
[
[
1,
2,
3,
4,
5,
6
],
[
7,
8
]
]
"""

@staticmethod
def from_numpy_ndarray(obj):
"""
Convert a list of numpy arrays ndarrays to a variable shape tensor extension array.
The length of the list will become the length of the variable shape tensor array.
Parameters
----------
obj : list of numpy.ndarray
Examples
--------
>>> import pyarrow as pa
>>> import numpy as np
>>> ndarray_list = [
... np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32),
... np.array([[7, 8]], dtype=np.float32),
... ]
>>> arr = pa.VariableShapeTensorArray.from_numpy_ndarray(ndarray_list)
>>> arr.type
VariableShapeTensorType(extension<arrow.variable_shape_tensor[value_type=float, ndim=2, permutation=[0,1]]>)
>>> arr
<pyarrow.lib.VariableShapeTensorArray object at ...>
-- is_valid: all not null
-- child 0 type: fixed_size_list<item: int32>[2]
[
[
2,
3
],
[
1,
2
]
]
-- child 1 type: list<item: float>
[
[
1,
2,
3,
4,
5,
6
],
[
7,
8
]
]
"""
assert isinstance(obj, list), 'obj must be a list of numpy arrays'
numpy_type = obj[0].dtype
arrow_type = from_numpy_dtype(numpy_type)
ndim = obj[0].ndim
permutations = [(-np.array(o.strides)).argsort(kind="stable") for o in obj]
permutation = permutations[0]
shapes = [np.take(o.shape, permutation) for o in obj]

if not all([o.dtype == numpy_type for o in obj]):
raise TypeError('All numpy arrays must have matching dtype.')

if not all([o.ndim == ndim for o in obj]):
raise ValueError('All numpy arrays must have matching ndim.')

if not all([np.array_equal(p, permutation) for p in permutations]):
raise ValueError('All numpy arrays must have matching permutation.')

for shape in shapes:
if len(shape) < 2:
raise ValueError(
"Cannot convert 1D array or scalar to fixed shape tensor array")
if np.prod(shape) == 0:
raise ValueError("Expected a non-empty ndarray")

values = array([np.ravel(o, order="K") for o in obj], list_(arrow_type))
shapes = array(shapes, list_(int32(), list_size=ndim))
struct_arr = StructArray.from_arrays([shapes, values], names=["shape", "data"])

return ExtensionArray.from_storage(variable_shape_tensor(arrow_type, ndim, permutation=permutation), struct_arr)

cdef dict _array_classes = {
_Type_NA: NullArray,
_Type_BOOL: BooleanArray,
Expand Down
25 changes: 1 addition & 24 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1057,11 +1057,6 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
void set_chunksize(int64_t chunksize)

cdef cppclass CTensor" arrow::Tensor":
CTensor(const shared_ptr[CDataType]& type,
const shared_ptr[CBuffer]& data,
const vector[int64_t]& shape,
const vector[int64_t]& strides,
const vector[c_string]& dim_names)
shared_ptr[CDataType] type()
shared_ptr[CBuffer] data()

Expand Down Expand Up @@ -2805,28 +2800,10 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":

shared_ptr[CArray] storage()

cdef extern from "arrow/extension/variable_shape_tensor.h" namespace "arrow::extension" nogil:
cdef cppclass CVariableShapeTensorType \
" arrow::extension::VariableShapeTensorType"(CExtensionType):

CResult[shared_ptr[CTensor]] MakeTensor(const shared_ptr[CExtensionScalar]& scalar) const

@staticmethod
CResult[shared_ptr[CDataType]] Make(const shared_ptr[CDataType]& value_type,
const int32_t ndim,
const vector[int64_t] permutation,
const vector[c_string] dim_names,
const vector[optional[int64_t]] uniform_shape)

const shared_ptr[CDataType] value_type()
const int32_t ndim()
const vector[int64_t] permutation()
const vector[c_string] dim_names()
const vector[optional[int64_t]] uniform_shape()

cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
cdef cppclass CFixedShapeTensorType \
" arrow::extension::FixedShapeTensorType"(CExtensionType) nogil:
" arrow::extension::FixedShapeTensorType"(CExtensionType):

CResult[shared_ptr[CTensor]] MakeTensor(const shared_ptr[CExtensionScalar]& scalar) const

Expand Down
5 changes: 0 additions & 5 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,6 @@ cdef class ExtensionType(BaseExtensionType):
const CPyExtensionType* cpy_ext_type


cdef class VariableShapeTensorType(BaseExtensionType):
cdef:
const CVariableShapeTensorType* tensor_ext_type


cdef class FixedShapeTensorType(BaseExtensionType):
cdef:
const CFixedShapeTensorType* tensor_ext_type
Expand Down
2 changes: 0 additions & 2 deletions python/pyarrow/public-api.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ cdef api object pyarrow_wrap_data_type(
cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
if cpy_ext_type != nullptr:
return cpy_ext_type.GetInstance()
elif ext_type.extension_name() == b"arrow.variable_shape_tensor":
out = VariableShapeTensorType.__new__(VariableShapeTensorType)
elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
out = FixedShapeTensorType.__new__(FixedShapeTensorType)
else:
Expand Down
38 changes: 1 addition & 37 deletions python/pyarrow/scalar.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -1054,7 +1054,7 @@ cdef class FixedShapeTensorScalar(ExtensionScalar):
The resulting ndarray's shape matches the permuted shape of the
fixed shape tensor scalar.
The conversion is zero-copy if data is primitive numeric and without nulls.
The conversion is zero-copy.
Returns
-------
Expand Down Expand Up @@ -1085,42 +1085,6 @@ cdef class FixedShapeTensorScalar(ExtensionScalar):
return pyarrow_wrap_tensor(ctensor)


cdef class VariableShapeTensorScalar(ExtensionScalar):
"""
Concrete class for variable shape tensor extension scalar.
"""

def to_numpy_ndarray(self):
"""
Convert variable shape tensor extension scalar to a numpy array.
The conversion is zero-copy if data is primitive numeric and without nulls.
Returns
-------
numpy.ndarray
"""
return self.to_tensor().to_numpy()

def to_tensor(self):
"""
Convert variable shape tensor extension scalar to a pyarrow.Tensor.
Returns
-------
tensor : pyarrow.Tensor
"""
cdef:
CVariableShapeTensorType* c_type = static_pointer_cast[CVariableShapeTensorType, CDataType](
self.wrapped.get().type).get()
shared_ptr[CExtensionScalar] scalar = static_pointer_cast[CExtensionScalar, CScalar](self.wrapped)
shared_ptr[CTensor] ctensor

with nogil:
ctensor = GetResultValue(c_type.MakeTensor(scalar))
return pyarrow_wrap_tensor(ctensor)


cdef dict _scalar_classes = {
_Type_BOOL: BooleanScalar,
_Type_UINT8: UInt8Scalar,
Expand Down
Loading

0 comments on commit eaeb350

Please sign in to comment.