diff --git a/pixi.toml b/pixi.toml index 52e57fb3..01360600 100644 --- a/pixi.toml +++ b/pixi.toml @@ -27,8 +27,8 @@ mkdocs-jupyter = "*" [feature.tests.tasks] test = "pytest --pyargs sparse -n auto" -test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -vvv", env = { SPARSE_BACKEND = "MLIR" } } -test-finch = { cmd = "pytest --pyargs sparse/tests -n auto", env = { SPARSE_BACKEND = "Finch" }, depends-on = ["precompile"] } +test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -v" } +test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -v", depends-on = ["precompile"] } [feature.tests.dependencies] pytest = ">=3.5" @@ -51,10 +51,19 @@ precompile = "python -c 'import finch'" scipy = ">=0.19" finch-tensor = ">=0.1.31" +[feature.finch.activation.env] +SPARSE_BACKEND = "Finch" + +[feature.finch.target.osx-arm64.activation.env] +PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" + [feature.mlir.dependencies] scipy = ">=0.19" mlir-python-bindings = "19.*" +[feature.mlir.activation.env] +SPARSE_BACKEND = "MLIR" + [environments] tests = ["tests", "extras"] docs = ["docs", "extras"] diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index 86b42965..a81d1d04 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -1,5 +1,7 @@ try: import mlir # noqa: F401 + + del mlir except ModuleNotFoundError as e: raise ImportError( "MLIR Python bindings not installed. Run " @@ -7,24 +9,9 @@ "to enable MLIR backend." ) from e -from ._constructors import ( - PackedArgumentTuple, - asarray, -) -from ._dtypes import ( - asdtype, -) -from ._ops import ( - add, - broadcast_to, - reshape, -) +from . import levels +from ._conversions import asarray, from_constituent_arrays, to_numpy, to_scipy +from ._dtypes import asdtype +from ._ops import add -__all__ = [ - "add", - "broadcast_to", - "asarray", - "asdtype", - "reshape", - "PackedArgumentTuple", -] +__all__ = ["add", "asarray", "asdtype", "to_numpy", "to_scipy", "levels", "from_constituent_arrays"] diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py new file mode 100644 index 00000000..681833d8 --- /dev/null +++ b/sparse/mlir_backend/_array.py @@ -0,0 +1,45 @@ +import numpy as np + +from ._dtypes import DType +from .levels import StorageFormat + + +class Array: + def __init__(self, *, storage, shape: tuple[int, ...]) -> None: + storage_rank = storage.get_storage_format().rank + if len(shape) != storage_rank: + raise ValueError(f"Mismatched rank, `{storage_rank=}`, `{shape=}`") + + self._storage = storage + self._shape = shape + + @property + def shape(self) -> tuple[int, ...]: + return self._shape + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def dtype(self) -> type[DType]: + return self._storage.get_storage_format().dtype + + @property + def format(self) -> StorageFormat: + return self._storage.get_storage_format() + + def _get_mlir_type(self): + return self.format._get_mlir_type(shape=self.shape) + + def _to_module_arg(self): + return self._storage.to_module_arg() + + def copy(self) -> "Array": + from ._conversions import from_constituent_arrays + + arrs = tuple(arr.copy() for arr in self.get_constituent_arrays()) + return from_constituent_arrays(format=self.format, arrays=arrs, shape=self.shape) + + def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: + return self._storage.get_constituent_arrays() diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index f78ac991..eeeef8f8 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -1,47 +1,50 @@ -import abc import ctypes import functools import weakref -from dataclasses import dataclass -from mlir import ir +import mlir.runtime as rt +import numpy as np -class MlirType(abc.ABC): - @classmethod - @abc.abstractmethod - def get_mlir_type(cls) -> ir.Type: ... +from ._core import libc +from ._dtypes import DType, asdtype -@dataclass -class PackedArgumentTuple: - contents: tuple +def fn_cache(f, maxsize: int | None = None): + return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) - def __getitem__(self, index): - return self.contents[index] - def __iter__(self): - yield from self.contents +def get_nd_memref_descr(rank: int, dtype: type[DType]) -> ctypes.Structure: + return _get_nd_memref_descr(int(rank), asdtype(dtype)) - def __len__(self): - return len(self.contents) +@fn_cache +def _get_nd_memref_descr(rank: int, dtype: type[DType]) -> ctypes.Structure: + return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) + + +def numpy_to_ranked_memref(arr: np.ndarray) -> ctypes.Structure: + memref = rt.get_ranked_memref_descriptor(arr) + memref_descr = get_nd_memref_descr(arr.ndim, asdtype(arr.dtype)) + # Required due to ctypes type checks + return memref_descr( + allocated=memref.allocated, + aligned=memref.aligned, + offset=memref.offset, + shape=memref.shape, + strides=memref.strides, + ) -def fn_cache(f, maxsize: int | None = None): - return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) +def ranked_memref_to_numpy(ref: ctypes.Structure) -> np.ndarray: + return rt.ranked_memref_to_numpy([ref]) -def _hold_self_ref_in_ret(fn): - @functools.wraps(fn) - def wrapped(self, *a, **kw): - ret = fn(self, *a, **kw) - _take_owneship(ret, self) - return ret - return wrapped +def free_memref(obj: ctypes.Structure) -> None: + libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) -def _take_owneship(owner, obj): +def _hold_ref(owner, obj): ptr = ctypes.py_object(obj) ctypes.pythonapi.Py_IncRef(ptr) diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py deleted file mode 100644 index 1f301908..00000000 --- a/sparse/mlir_backend/_constructors.py +++ /dev/null @@ -1,432 +0,0 @@ -import ctypes -from collections.abc import Iterable -from typing import Any - -import mlir.runtime as rt -from mlir import ir -from mlir.dialects import sparse_tensor - -import numpy as np -import scipy.sparse as sps - -from ._common import PackedArgumentTuple, _hold_self_ref_in_ret, _take_owneship, fn_cache -from ._core import ctx, libc -from ._dtypes import DType, asdtype - -########### -# Memrefs # -########### - - -@fn_cache -def get_nd_memref_descr(rank: int, dtype: type[DType]) -> type: - return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) - - -def numpy_to_ranked_memref(arr: np.ndarray) -> ctypes.Structure: - memref = rt.get_ranked_memref_descriptor(arr) - memref_descr = get_nd_memref_descr(arr.ndim, asdtype(arr.dtype)) - # Required due to ctypes type checks - return memref_descr( - allocated=memref.allocated, - aligned=memref.aligned, - offset=memref.offset, - shape=memref.shape, - strides=memref.strides, - ) - - -def ranked_memref_to_numpy(ref: ctypes.Structure) -> np.ndarray: - return rt.ranked_memref_to_numpy([ref]) - - -def free_memref(obj: ctypes.Structure) -> None: - libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) - - -########### -# Formats # -########### - - -@fn_cache -def get_sparse_vector_class( - values_dtype: type[DType], - index_dtype: type[DType], -) -> type[ctypes.Structure]: - class SparseVector(ctypes.Structure): - _fields_ = [ - ("indptr", get_nd_memref_descr(1, index_dtype)), - ("indices", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arrs: list[np.ndarray]) -> "SparseVector": - sv_instance = cls(*[numpy_to_ranked_memref(arr) for arr in arrs]) - for arr in arrs: - _take_owneship(sv_instance, arr) - return sv_instance - - def to_sps(self, shape: tuple[int, ...]) -> int: - return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.indptr)), - ctypes.pointer(ctypes.pointer(self.indices)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.indptr, self.indices, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.compressed,) - ordering = ir.AffineMap.get_permutation([0]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return SparseVector - - -@fn_cache -def get_csx_class( - values_dtype: type[DType], - index_dtype: type[DType], - order: str, -) -> type[ctypes.Structure]: - class Csx(ctypes.Structure): - _fields_ = [ - ("indptr", get_nd_memref_descr(1, index_dtype)), - ("indices", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - _order = order - - @classmethod - def from_sps(cls, arr: sps.csr_array | sps.csc_array) -> "Csx": - indptr = numpy_to_ranked_memref(arr.indptr) - indices = numpy_to_ranked_memref(arr.indices) - data = numpy_to_ranked_memref(arr.data) - - csr_instance = cls(indptr=indptr, indices=indices, data=data) - _take_owneship(csr_instance, arr) - - return csr_instance - - def to_sps(self, shape: tuple[int, ...]) -> sps.csr_array | sps.csc_array: - pos = ranked_memref_to_numpy(self.indptr) - crd = ranked_memref_to_numpy(self.indices) - data = ranked_memref_to_numpy(self.data) - return get_csx_scipy_class(self._order)((data, crd, pos), shape=shape) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.indptr)), - ctypes.pointer(ctypes.pointer(self.indices)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.indptr, self.indices, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.dense, sparse_tensor.LevelFormat.compressed) - ordering = ir.AffineMap.get_permutation(get_order_tuple(cls._order)) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Csx - - -@fn_cache -def get_coo_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - class Coo(ctypes.Structure): - _fields_ = [ - ("pos", get_nd_memref_descr(1, index_dtype)), - ("coords", get_nd_memref_descr(2, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arr: sps.coo_array | Iterable[np.ndarray]) -> "Coo": - if isinstance(arr, sps.coo_array): - if not arr.has_canonical_format: - raise Exception("COO must have canonical format") - np_pos = np.array([0, arr.size], dtype=index_dtype.np_dtype) - np_coords = np.stack(arr.coords, axis=1, dtype=index_dtype.np_dtype) - np_data = arr.data - else: - if len(arr) != 3: - raise Exception("COO must be comprised of three arrays") - np_pos, np_coords, np_data = arr - - pos = numpy_to_ranked_memref(np_pos) - coords = numpy_to_ranked_memref(np_coords) - data = numpy_to_ranked_memref(np_data) - coo_instance = cls(pos=pos, coords=coords, data=data) - _take_owneship(coo_instance, np_pos) - _take_owneship(coo_instance, np_coords) - _take_owneship(coo_instance, np_data) - - return coo_instance - - def to_sps(self, shape: tuple[int, ...]) -> sps.coo_array | list[np.ndarray]: - pos = ranked_memref_to_numpy(self.pos) - coords = ranked_memref_to_numpy(self.coords)[pos[0] : pos[1]] - data = ranked_memref_to_numpy(self.data) - return ( - sps.coo_array((data, coords.T), shape=shape) - if len(shape) == 2 - else PackedArgumentTuple((pos, coords, data)) - ) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.pos)), - ctypes.pointer(ctypes.pointer(self.coords)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.pos, self.coords, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - compressed_lvl = sparse_tensor.EncodingAttr.build_level_type( - sparse_tensor.LevelFormat.compressed, [sparse_tensor.LevelProperty.non_unique] - ) - mid_singleton_lvls = [ - sparse_tensor.EncodingAttr.build_level_type( - sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.non_unique] - ) - ] * (len(shape) - 2) - levels = (compressed_lvl, *mid_singleton_lvls, sparse_tensor.LevelFormat.singleton) - ordering = ir.AffineMap.get_permutation([*range(len(shape))]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Coo - - -@fn_cache -def get_csf_class( - values_dtype: type[DType], - index_dtype: type[DType], -) -> type[ctypes.Structure]: - class Csf(ctypes.Structure): - _fields_ = [ - ("indptr_1", get_nd_memref_descr(1, index_dtype)), - ("indices_1", get_nd_memref_descr(1, index_dtype)), - ("indptr_2", get_nd_memref_descr(1, index_dtype)), - ("indices_2", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arrs: list[np.ndarray]) -> "Csf": - csf_instance = cls(*[numpy_to_ranked_memref(arr) for arr in arrs]) - for arr in arrs: - _take_owneship(csf_instance, arr) - return csf_instance - - def to_sps(self, shape: tuple[int, ...]) -> list[np.ndarray]: - return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) - - def to_module_arg(self) -> list: - return [ctypes.pointer(ctypes.pointer(field)) for field in self.get__fields_()] - - def get__fields_(self) -> list: - return [self.indptr_1, self.indices_1, self.indptr_2, self.indices_2, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = ( - sparse_tensor.LevelFormat.dense, - sparse_tensor.LevelFormat.compressed, - sparse_tensor.LevelFormat.compressed, - ) - ordering = ir.AffineMap.get_permutation([0, 1, 2]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Csf - - -@fn_cache -def get_dense_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - class Dense(ctypes.Structure): - _fields_ = [ - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arr: np.ndarray) -> "Dense": - data = numpy_to_ranked_memref(arr.ravel()) - - dense_instance = cls(data=data) - _take_owneship(dense_instance, arr) - - return dense_instance - - def to_sps(self, shape: tuple[int, ...]) -> np.ndarray: - data = ranked_memref_to_numpy(self.data) - return data.reshape(shape) - - def to_module_arg(self) -> list: - return [ctypes.pointer(ctypes.pointer(self.data))] - - def get__fields_(self) -> list: - return [self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.dense,) * len(shape) - ordering = ir.AffineMap.get_permutation([*range(len(shape))]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Dense - - -def _is_scipy_sparse_obj(x) -> bool: - return hasattr(x, "__module__") and x.__module__.startswith("scipy.sparse") - - -def _is_numpy_obj(x) -> bool: - return isinstance(x, np.ndarray) - - -def _is_mlir_obj(x) -> bool: - return isinstance(x, ctypes.Structure) - - -def get_order_tuple(order: str) -> tuple[int, int]: - if order in ("r", "c"): - return (0, 1) if order == "r" else (1, 0) - raise Exception(f"Invalid order: {order}") - - -def get_csx_scipy_class(order: str) -> type[sps.sparray]: - if order in ("r", "c"): - return sps.csr_array if order == "r" else sps.csc_array - raise Exception(f"Invalid order: {order}") - - -_constructor_class_dict = { - "csr": get_csx_class, - "csc": get_csx_class, - "csf": get_csf_class, - "coo": get_coo_class, - "sparse_vector": get_sparse_vector_class, - "dense": get_dense_class, -} - - -################ -# Tensor class # -################ - - -class Tensor: - def __init__( - self, - obj: Any, - shape: tuple[int, ...] | None = None, - dtype: type[DType] | None = None, - format: str | None = None, - ) -> None: - self.shape = shape if shape is not None else obj.shape - self.ndim = len(self.shape) - self._values_dtype = dtype if dtype is not None else asdtype(obj.dtype) - - if _is_scipy_sparse_obj(obj): - self._owns_memory = False - - if obj.format in ("csr", "csc"): - order = "r" if obj.format == "csr" else "c" - self._index_dtype = asdtype(obj.indptr.dtype) - self._format_class = get_csx_class(self._values_dtype, self._index_dtype, order) - self._obj = self._format_class.from_sps(obj) - elif obj.format == "coo": - self._index_dtype = asdtype(obj.coords[0].dtype) - self._format_class = get_coo_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - else: - raise Exception(f"{obj.format} SciPy format not supported.") - - elif _is_numpy_obj(obj): - self._owns_memory = False - self._index_dtype = asdtype(np.intp) - self._format_class = get_dense_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - - elif _is_mlir_obj(obj): - self._owns_memory = True - self._format_class = type(obj) - self._obj = obj - - elif format is not None: - if format in ["csf", "coo", "sparse_vector"]: - fn_format_class = _constructor_class_dict[format] - self._owns_memory = False - self._index_dtype = asdtype(np.intp) - self._format_class = fn_format_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - - else: - raise Exception(f"Format {format} not supported.") - - else: - raise Exception(f"{type(obj)} not supported.") - - def __del__(self): - if self._owns_memory: - for field in self._obj.get__fields_(): - free_memref(field) - - @_hold_self_ref_in_ret - def to_scipy_sparse(self) -> sps.sparray | np.ndarray: - return self._obj.to_sps(self.shape) - - -def asarray(obj, shape=None, dtype=None, format=None) -> Tensor: - return Tensor(obj, shape, dtype, format) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py new file mode 100644 index 00000000..ce47aaec --- /dev/null +++ b/sparse/mlir_backend/_conversions.py @@ -0,0 +1,161 @@ +import functools + +import numpy as np + +from ._array import Array +from .levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format + +try: + import scipy.sparse as sps + + ScipySparseArray = sps.sparray | sps.spmatrix +except ImportError: + sps = None + ScipySparseArray = None + + +def _guard_scipy(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + if sps is None: + raise RuntimeError("Could not import `scipy.sparse`. Please install `scipy`.") + + return f(*args, **kwargs) + + return wrapped + + +def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: + if copy is not None and not copy and not arr.flags["C_CONTIGUOUS"]: + raise NotImplementedError("Cannot only convert C-contiguous arrays at the moment.") + if copy: + arr = arr.copy(order="C") + arr_flat = np.ascontiguousarray(arr).reshape(-1) + levels = (Level(LevelFormat.Dense),) * arr.ndim + dense_format = get_storage_format( + levels=levels, + order="C", + pos_width=64, + crd_width=64, + dtype=arr.dtype, + ) + return from_constituent_arrays(format=dense_format, arrays=(arr_flat,), shape=arr.shape) + + +def to_numpy(arr: Array) -> np.ndarray: + storage_format: StorageFormat = arr.format + + if not all(LevelFormat.Dense == level.format for level in storage_format.levels): + raise TypeError(f"Cannot convert a non-dense array to NumPy. `{storage_format=}`") + + (data,) = arr.get_constituent_arrays() + arg_order = [0] * storage_format.storage_rank + for i, o in enumerate(storage_format.order): + arg_order[o] = i + arg_order = tuple(arg_order) + storage_shape = tuple(int(arr.shape[o]) for o in arg_order) + return data.reshape(storage_shape).transpose(arg_order) + + +@_guard_scipy +def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: + if not isinstance(arr, ScipySparseArray): + raise TypeError(f"`arr` is not a `scipy.sparse` array, `{type(arr)=}`.") + match arr.format: + case "csr" | "csc": + pos_width = arr.indptr.dtype.itemsize * 8 + crd_width = arr.indices.dtype.itemsize * 8 + csx_format = get_storage_format( + levels=( + Level(LevelFormat.Dense), + Level( + LevelFormat.Compressed, + LevelProperties(0) + if arr.has_canonical_format + else LevelProperties.NonUnique | LevelProperties.NonOrdered, + ), + ), + order=(0, 1) if arr.format == "csr" else (1, 0), + pos_width=pos_width, + crd_width=crd_width, + dtype=arr.dtype, + ) + + indptr = arr.indptr + indices = arr.indices + data = arr.data + + if copy: + indptr = indptr.copy() + indices = indices.copy() + data = data.copy() + + return from_constituent_arrays(format=csx_format, arrays=(indptr, indices, data), shape=arr.shape) + case "coo": + if copy is not None and not copy: + raise RuntimeError(f"`scipy.sparse.{type(arr.__name__)}` cannot be zero-copy converted.") + coords = np.stack([arr.row, arr.col], axis=1) + pos = np.array([0, arr.nnz], dtype=np.int64) + pos_width = pos.dtype.itemsize * 8 + crd_width = coords.dtype.itemsize * 8 + data = arr.data + if copy: + data = arr.data.copy() + + level_props = LevelProperties(0) + if not arr.has_canonical_format: + level_props |= LevelProperties.NonOrdered | LevelProperties.NonUnique + + coo_format = get_storage_format( + levels=( + Level(LevelFormat.Compressed, level_props | LevelProperties.NonUnique), + Level(LevelFormat.Singleton, level_props), + ), + order=(0, 1), + pos_width=pos_width, + crd_width=crd_width, + dtype=arr.dtype, + ) + + return from_constituent_arrays(format=coo_format, arrays=(pos, coords, data), shape=arr.shape) + case _: + raise NotImplementedError(f"No conversion implemented for `scipy.sparse.{type(arr.__name__)}`.") + + +@_guard_scipy +def to_scipy(arr: Array) -> ScipySparseArray: + storage_format = arr.format + + match storage_format.levels: + case (Level(LevelFormat.Dense, _), Level(LevelFormat.Compressed, _)): + indptr, indices, data = arr.get_constituent_arrays() + if storage_format.order == (0, 1): + sps_arr = sps.csr_array((data, indices, indptr), shape=arr.shape) + else: + sps_arr = sps.csc_array((data, indices, indptr), shape=arr.shape) + case (Level(LevelFormat.Compressed, _), Level(LevelFormat.Singleton, _)): + _, coords, data = arr.get_constituent_arrays() + sps_arr = sps.coo_array((data, (coords[:, 0], coords[:, 1])), shape=arr.shape) + case _: + raise RuntimeError(f"No conversion implemented for `{storage_format=}`.") + + return sps_arr + + +def asarray(arr, copy: bool | None = None) -> Array: + if sps is not None and isinstance(arr, ScipySparseArray): + return _from_scipy(arr, copy=copy) + if isinstance(arr, np.ndarray): + return _from_numpy(arr, copy=copy) + + if isinstance(arr, Array): + if copy: + arr = arr.copy() + return arr + + return _from_numpy(np.asarray(arr, copy=copy), copy=None) + + +def from_constituent_arrays(*, format: StorageFormat, arrays: tuple[np.ndarray, ...], shape: tuple[int, ...]) -> Array: + storage = format._get_ctypes_type().from_constituent_arrays(arrays) + return Array(storage=storage, shape=shape) diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py index 2ab41401..30af4475 100644 --- a/sparse/mlir_backend/_dtypes.py +++ b/sparse/mlir_backend/_dtypes.py @@ -1,3 +1,4 @@ +import abc import inspect import math import sys @@ -7,7 +8,11 @@ import numpy as np -from ._common import MlirType + +class MlirType(abc.ABC): + @classmethod + @abc.abstractmethod + def _get_mlir_type(cls) -> ir.Type: ... def _get_pointer_width() -> int: @@ -25,7 +30,7 @@ class SignedBW(SignedIntegerDType): bit_width = bw @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IntegerType.get_signless(cls.bit_width) SignedBW.__name__ = f"Int{bw}" @@ -36,7 +41,7 @@ class UnsignedBW(UnsignedIntegerDType): bit_width = bw @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IntegerType.get_signless(cls.bit_width) UnsignedBW.__name__ = f"UInt{bw}" @@ -63,7 +68,7 @@ class Float64(FloatingDType): bit_width = 64 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F64Type.get() @@ -72,7 +77,7 @@ class Float32(FloatingDType): bit_width = 32 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F32Type.get() @@ -81,7 +86,7 @@ class Float16(FloatingDType): bit_width = 16 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F16Type.get() @@ -101,7 +106,7 @@ class Index(DType): np_dtype = np.intp @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IndexType.get() diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 963bbd1c..2fe9b975 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -5,12 +5,10 @@ from mlir import ir from mlir.dialects import arith, func, linalg, sparse_tensor, tensor -import numpy as np - +from ._array import Array from ._common import fn_cache -from ._constructors import Tensor, numpy_to_ranked_memref from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx, pm -from ._dtypes import DType, FloatingDType, Index +from ._dtypes import DType, FloatingDType @fn_cache @@ -25,7 +23,7 @@ def get_add_module( module = ir.Module.create() # TODO: add support for complex dialect/dtypes arith_op = arith.AddFOp if issubclass(dtype, FloatingDType) else arith.AddIOp - dtype = dtype.get_mlir_type() + dtype = dtype._get_mlir_type() ordering = ir.AffineMap.get_permutation(range(rank)) with ir.InsertionPoint(module.body): @@ -121,77 +119,23 @@ def broadcast_to(in_tensor): return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) -def add(x1: Tensor, x2: Tensor) -> Tensor: - ret_obj = x1._format_class() - out_tensor_type = x1._obj.get_tensor_definition(x1.shape) +def add(x1: Array, x2: Array) -> Array: + ret_storage_format = x1.format + ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() + out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape) # TODO: Decide what will be the output tensor_type add_module = get_add_module( - x1._obj.get_tensor_definition(x1.shape), - x2._obj.get_tensor_definition(x2.shape), + x1._get_mlir_type(), + x2._get_mlir_type(), out_tensor_type=out_tensor_type, - dtype=x1._values_dtype, + dtype=x1.dtype, rank=x1.ndim, ) add_module.invoke( "add", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x1._obj.to_module_arg(), - *x2._obj.to_module_arg(), - ) - return Tensor(ret_obj, shape=out_tensor_type.shape) - - -def _infer_format_class(rank: int, values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - from ._constructors import get_csf_class, get_csx_class, get_dense_class - - if rank == 1: - return get_dense_class(values_dtype, index_dtype) - if rank == 2: - return get_csx_class(values_dtype, index_dtype, order="r") - if rank == 3: - return get_csf_class(values_dtype, index_dtype) - raise Exception(f"Rank not supported to infer format: {rank}") - - -def reshape(x: Tensor, /, shape: tuple[int, ...]) -> Tensor: - x_tensor_type = x._obj.get_tensor_definition(x.shape) - if len(x.shape) == len(shape): - out_tensor_type = x._obj.get_tensor_definition(shape) - ret_obj = x._format_class() - else: - format_class = _infer_format_class(len(shape), x._values_dtype, x._index_dtype) - out_tensor_type = format_class.get_tensor_definition(shape) - ret_obj = format_class() - - with ir.Location.unknown(ctx): - shape_tensor_type = ir.RankedTensorType.get([len(shape)], Index.get_mlir_type()) - - reshape_module = get_reshape_module(x_tensor_type, shape_tensor_type, out_tensor_type) - - shape = np.array(shape) - reshape_module.invoke( - "reshape", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x._obj.to_module_arg(), - ctypes.pointer(ctypes.pointer(numpy_to_ranked_memref(shape))), + ctypes.pointer(ctypes.pointer(ret_storage)), + *x1._to_module_arg(), + *x2._to_module_arg(), ) - - return Tensor(ret_obj, shape=out_tensor_type.shape) - - -def broadcast_to(x: Tensor, /, shape: tuple[int, ...], dimensions: list[int]) -> Tensor: - x_tensor_type = x._obj.get_tensor_definition(x.shape) - format_class = _infer_format_class(len(shape), x._values_dtype, x._index_dtype) - out_tensor_type = format_class.get_tensor_definition(shape) - ret_obj = format_class() - - broadcast_to_module = get_broadcast_to_module(x_tensor_type, out_tensor_type, tuple(dimensions)) - - broadcast_to_module.invoke( - "broadcast_to", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x._obj.to_module_arg(), - ) - - return Tensor(ret_obj, shape=shape) + return Array(storage=ret_storage, shape=out_tensor_type.shape) diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py new file mode 100644 index 00000000..91f0e9c9 --- /dev/null +++ b/sparse/mlir_backend/levels.py @@ -0,0 +1,197 @@ +import ctypes +import dataclasses +import enum +import itertools +import re +import typing + +from mlir import ir +from mlir.dialects import sparse_tensor + +import numpy as np + +from ._common import ( + _hold_ref, + fn_cache, + free_memref, + get_nd_memref_descr, + numpy_to_ranked_memref, + ranked_memref_to_numpy, +) +from ._core import ctx +from ._dtypes import DType, asdtype + +_CAMEL_TO_SNAKE = [re.compile("(.)([A-Z][a-z]+)"), re.compile("([a-z0-9])([A-Z])")] + +__all__ = ["LevelProperties", "LevelFormat", "StorageFormat", "Level", "get_storage_format"] + + +def _camel_to_snake(name: str) -> str: + for exp in _CAMEL_TO_SNAKE: + name = exp.sub(r"\1_\2", name) + + return name.lower() + + +class LevelProperties(enum.Flag): + NonOrdered = enum.auto() + NonUnique = enum.auto() + + def build(self) -> list[sparse_tensor.LevelProperty]: + return [getattr(sparse_tensor.LevelProperty, _camel_to_snake(p.name)) for p in type(self) if p in self] + + +class LevelFormat(enum.Enum): + Dense = "dense" + Compressed = "compressed" + Singleton = "singleton" + + def build(self) -> sparse_tensor.LevelFormat: + return getattr(sparse_tensor.LevelFormat, self.value) + + +@dataclasses.dataclass(eq=True, frozen=True) +class Level: + format: LevelFormat + properties: LevelProperties = LevelProperties(0) + + def build(self): + return sparse_tensor.EncodingAttr.build_level_type(self.format.build(), self.properties.build()) + + +@dataclasses.dataclass(eq=True, frozen=True, kw_only=True) +class StorageFormat: + levels: tuple[Level, ...] + order: tuple[int, ...] + pos_width: int + crd_width: int + dtype: type[DType] + + @property + def storage_rank(self) -> int: + return len(self.levels) + + @property + def rank(self) -> int: + return self.storage_rank + + def __post_init__(self): + if sorted(self.order) != list(range(self.rank)): + raise ValueError(f"`sorted(self.order) != list(range(self.rank))`, `{self.order=}`, `{self.rank=}`.") + + @fn_cache + def _get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: + if len(shape) != self.rank: + raise ValueError(f"`len(shape) != self.rank`, {shape=}, {self.rank=}") + with ir.Location.unknown(ctx): + mlir_levels = [level.build() for level in self.levels] + mlir_order = list(self.order) + mlir_reverse_order = [0] * self.rank + for i, r in enumerate(mlir_order): + mlir_reverse_order[r] = i + + dtype = self.dtype._get_mlir_type() + encoding = sparse_tensor.EncodingAttr.get( + mlir_levels, + ir.AffineMap.get_permutation(mlir_order), + ir.AffineMap.get_permutation(mlir_reverse_order), + self.pos_width, + self.crd_width, + ) + return ir.RankedTensorType.get(list(shape), dtype, encoding) + + @fn_cache + def _get_ctypes_type(self, *, owns_memory=False): + ptr_dtype = asdtype(getattr(np, f"int{self.pos_width}")) + idx_dtype = asdtype(getattr(np, f"int{self.crd_width}")) + + def get_fields(): + fields = [] + compressed_counter = 0 + for level, next_level in itertools.zip_longest(self.levels, self.levels[1:]): + if LevelFormat.Compressed == level.format: + compressed_counter += 1 + fields.append((f"pointers_to_{compressed_counter}", get_nd_memref_descr(1, ptr_dtype))) + if next_level is not None and LevelFormat.Singleton == next_level.format: + fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(2, idx_dtype))) + else: + fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(1, idx_dtype))) + + fields.append(("values", get_nd_memref_descr(1, self.dtype))) + return fields + + storage_format = self + + class Storage(ctypes.Structure): + _fields_ = get_fields() + + def to_module_arg(self) -> list: + return [ctypes.pointer(ctypes.pointer(f)) for f in self.get__fields_()] + + def get__fields_(self) -> list: + return [getattr(self, field[0]) for field in self._fields_] + + def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: + arrays = tuple(ranked_memref_to_numpy(field) for field in self.get__fields_()) + for arr in arrays: + _hold_ref(arr, self) + return arrays + + def get_storage_format(self) -> StorageFormat: + return storage_format + + @classmethod + def from_constituent_arrays(cls, arrs: list[np.ndarray]) -> "Storage": + storage = cls(*(numpy_to_ranked_memref(arr) for arr in arrs)) + for arr in arrs: + _hold_ref(storage, arr) + return storage + + if owns_memory: + + def __del__(self) -> None: + for field in self.get__fields_(): + free_memref(field) + + return Storage + + +def get_storage_format( + *, + levels: tuple[Level, ...], + order: typing.Literal["C", "F"] | tuple[int, ...], + pos_width: int, + crd_width: int, + dtype: type[DType], +) -> StorageFormat: + levels = tuple(levels) + if isinstance(order, str): + if order == "C": + order = tuple(range(len(levels))) + if order == "F": + order = tuple(reversed(range(len(levels)))) + return _get_storage_format( + levels=levels, + order=order, + pos_width=int(pos_width), + crd_width=int(crd_width), + dtype=asdtype(dtype), + ) + + +@fn_cache +def _get_storage_format( + *, + levels: tuple[Level, ...], + order: tuple[int, ...], + pos_width: int, + crd_width: int, + dtype: type[DType], +) -> StorageFormat: + return StorageFormat( + levels=levels, + order=order, + pos_width=pos_width, + crd_width=crd_width, + dtype=dtype, + ) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 0fb2e4d2..2fd06b3e 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -32,21 +32,16 @@ def assert_csx_equal( expected: sps.csr_array | sps.csc_array, actual: sps.csr_array | sps.csc_array, ) -> None: - np.testing.assert_array_equal(expected.todense(), actual.todense()) - # Broken due to https://github.com/scipy/scipy/issues/21442 - # desired.sort_indices() - # desired.sum_duplicates() - # desired.prune() + assert expected.format == actual.format + expected.eliminate_zeros() + expected.sum_duplicates() - # actual.sort_indices() - # actual.sum_duplicates() - # actual.prune() + actual.eliminate_zeros() + actual.sum_duplicates() - # np.testing.assert_array_equal(desired.todense(), actual.todense()) - - # np.testing.assert_array_equal(desired.indptr, actual.indptr) - # np.testing.assert_array_equal(desired.indices, actual.indices) - # np.testing.assert_array_equal(desired.data, actual.data) + np.testing.assert_array_equal(expected.indptr, actual.indptr) + np.testing.assert_array_equal(expected.indices, actual.indices) + np.testing.assert_array_equal(expected.data, actual.data) def generate_sampler(dtype: np.dtype, rng: np.random.Generator) -> typing.Callable[[tuple[int, ...]], np.ndarray]: @@ -87,9 +82,9 @@ def get_exampe_csf_arrays(dtype: np.dtype) -> tuple: @parametrize_dtypes @pytest.mark.parametrize("shape", [(100,), (10, 200), (5, 10, 20)]) def test_dense_format(dtype, shape): - data = np.arange(math.prod(shape), dtype=dtype) + data = np.arange(math.prod(shape), dtype=dtype).reshape(shape) tensor = sparse.asarray(data) - actual = tensor.to_scipy_sparse() + actual = sparse.to_numpy(tensor) np.testing.assert_equal(actual, data) @@ -110,19 +105,19 @@ def test_2d_constructors(rng, dtype): coo_tensor = sparse.asarray(coo) dense_2_tensor = sparse.asarray(np.arange(100, dtype=dtype).reshape((25, 4)) + 10) - csr_retured = csr_tensor.to_scipy_sparse() + csr_retured = sparse.to_scipy(csr_tensor) assert_csx_equal(csr_retured, csr) - csc_retured = csc_tensor.to_scipy_sparse() + csc_retured = sparse.to_scipy(csc_tensor) assert_csx_equal(csc_retured, csc) - dense_returned = dense_tensor.to_scipy_sparse() + dense_returned = sparse.to_numpy(dense_tensor) np.testing.assert_equal(dense_returned, dense) - coo_returned = coo_tensor.to_scipy_sparse() + coo_returned = sparse.to_scipy(coo_tensor) np.testing.assert_equal(coo_returned.todense(), coo.todense()) - dense_2_returned = dense_2_tensor.to_scipy_sparse() + dense_2_returned = sparse.to_numpy(dense_2_tensor) np.testing.assert_equal(dense_2_returned, np.arange(100, dtype=dtype).reshape((25, 4)) + 10) @@ -145,23 +140,23 @@ def test_add(rng, dtype): dense_tensor = sparse.asarray(dense) coo_tensor = sparse.asarray(coo) - actual = sparse.add(csr_tensor, csr_2_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_tensor, csr_2_tensor)) expected = csr + csr_2 assert_csx_equal(expected, actual) - actual = sparse.add(csc_tensor, csc_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csc_tensor, csc_tensor)) expected = csc + csc assert_csx_equal(expected, actual) - actual = sparse.add(csc_tensor, csr_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor)) expected = csc + csr assert_csx_equal(expected, actual) - actual = sparse.add(csr_tensor, dense_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_tensor, dense_tensor)) expected = sps.csr_matrix(csr + dense) assert_csx_equal(expected, actual) - actual = sparse.add(dense_tensor, csr_tensor).to_scipy_sparse() + actual = sparse.to_numpy(sparse.add(dense_tensor, csr_tensor)) expected = csr + dense assert isinstance(actual, np.ndarray) np.testing.assert_array_equal(actual, expected) @@ -172,9 +167,9 @@ def test_add(rng, dtype): # assert isinstance(actual, np.ndarray) # np.testing.assert_array_equal(actual, expected) - actual = sparse.add(csr_2_tensor, coo_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_2_tensor, coo_tensor)) expected = csr_2 + coo - np.testing.assert_array_equal(actual.todense(), expected.todense()) + assert_csx_equal(expected, actual) # NOTE: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247 # actual = sparse.add(d_tensor, d_tensor).to_scipy_sparse() @@ -184,189 +179,106 @@ def test_add(rng, dtype): @parametrize_dtypes def test_csf_format(dtype): + format = sparse.levels.get_storage_format( + levels=( + sparse.levels.Level(sparse.levels.LevelFormat.Dense), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + ), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + ) + SHAPE = (2, 2, 4) pos_1, crd_1, pos_2, crd_2, data = get_exampe_csf_arrays(dtype) - csf = [pos_1, crd_1, pos_2, crd_2, data] + constituent_arrays = (pos_1, crd_1, pos_2, crd_2, data) - csf_tensor = sparse.asarray(csf, shape=SHAPE, dtype=sparse.asdtype(dtype), format="csf") - result = csf_tensor.to_scipy_sparse() - for actual, expected in zip(result, csf, strict=False): + csf_array = sparse.from_constituent_arrays(format=format, arrays=constituent_arrays, shape=SHAPE) + result_arrays = csf_array.get_constituent_arrays() + for actual, expected in zip(result_arrays, constituent_arrays, strict=True): np.testing.assert_array_equal(actual, expected) - res_tensor = sparse.add(csf_tensor, csf_tensor).to_scipy_sparse() - csf_2 = [pos_1, crd_1, pos_2, crd_2, data * 2] - for actual, expected in zip(res_tensor, csf_2, strict=False): + res_arrays = sparse.add(csf_array, csf_array).get_constituent_arrays() + expected_arrays = (pos_1, crd_1, pos_2, crd_2, data * 2) + for actual, expected in zip(res_arrays, expected_arrays, strict=True): np.testing.assert_array_equal(actual, expected) @parametrize_dtypes def test_coo_3d_format(dtype): + format = sparse.levels.get_storage_format( + levels=( + sparse.levels.Level(sparse.levels.LevelFormat.Compressed, sparse.levels.LevelProperties.NonUnique), + sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.NonUnique), + sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.NonUnique), + ), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + ) + SHAPE = (2, 2, 4) pos = np.array([0, 7]) crd = np.array([[0, 1, 0, 0, 1, 1, 0], [1, 3, 1, 0, 0, 1, 0], [3, 1, 1, 0, 1, 1, 1]]) data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype) - coo = [pos, crd, data] + carrs = (pos, crd, data) - coo_tensor = sparse.asarray(coo, shape=SHAPE, dtype=sparse.asdtype(dtype), format="coo") - result = coo_tensor.to_scipy_sparse() - for actual, expected in zip(result, coo, strict=False): + coo_array = sparse.from_constituent_arrays(format=format, arrays=carrs, shape=SHAPE) + result = coo_array.get_constituent_arrays() + for actual, expected in zip(result, carrs, strict=True): np.testing.assert_array_equal(actual, expected) # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 - # res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse() - # coo_2 = [pos, crd, data * 2] - # for actual, expected in zip(res_tensor, coo_2, strict=False): + # res_arrays = sparse.add(coo_array, coo_array).get_constituent_arrays() + # res_expected = (pos, crd, data * 2) + # for actual, expected in zip(res_arrays, res_expected, strict=False): # np.testing.assert_array_equal(actual, expected) @parametrize_dtypes def test_sparse_vector_format(dtype): + format = sparse.levels.get_storage_format( + levels=(sparse.levels.Level(sparse.levels.LevelFormat.Compressed),), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + ) + SHAPE = (10,) pos = np.array([0, 6]) crd = np.array([0, 1, 2, 6, 8, 9]) data = np.array([1, 2, 3, 4, 5, 6], dtype=dtype) - sparse_vector = [pos, crd, data] + carrs = (pos, crd, data) - sv_tensor = sparse.asarray( - sparse_vector, - shape=SHAPE, - dtype=sparse.asdtype(dtype), - format="sparse_vector", - ) - result = sv_tensor.to_scipy_sparse() - for actual, expected in zip(result, sparse_vector, strict=False): + sv_array = sparse.from_constituent_arrays(format=format, arrays=carrs, shape=SHAPE) + result = sv_array.get_constituent_arrays() + for actual, expected in zip(result, carrs, strict=True): np.testing.assert_array_equal(actual, expected) - res_tensor = sparse.add(sv_tensor, sv_tensor).to_scipy_sparse() - sparse_vector_2 = [pos, crd, data * 2] - for actual, expected in zip(res_tensor, sparse_vector_2, strict=False): + res_arrs = sparse.add(sv_array, sv_array).get_constituent_arrays() + sv2_expected = (pos, crd, data * 2) + for actual, expected in zip(res_arrs, sv2_expected, strict=True): np.testing.assert_array_equal(actual, expected) dense = np.array([1, 2, 3, 0, 0, 0, 4, 0, 5, 6], dtype=dtype) - dense_tensor = sparse.asarray(dense) - res_tensor = sparse.add(dense_tensor, sv_tensor).to_scipy_sparse() - np.testing.assert_array_equal(res_tensor, dense * 2) - - -@parametrize_dtypes -def test_reshape(rng, dtype): - DENSITY = 0.5 - sampler = generate_sampler(dtype, rng) + dense_array = sparse.asarray(dense) + res = sparse.to_numpy(sparse.add(dense_array, sv_array)) + np.testing.assert_array_equal(res, dense * 2) - # CSR, CSC, COO - for shape, new_shape in [ - ((100, 50), (25, 200)), - ((100, 50), (10, 500, 1)), - ((80, 1), (8, 10)), - ((80, 1), (80,)), - ]: - for format in ["csr", "csc", "coo"]: - if format == "coo": - # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 - continue - if format == "csc": - # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641 - continue - - arr = sps.random_array( - shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler - ) - arr.sum_duplicates() - tensor = sparse.asarray(arr) - - actual = sparse.reshape(tensor, shape=new_shape).to_scipy_sparse() - if isinstance(actual, sparse.PackedArgumentTuple): - continue # skip checking CSF output - if not isinstance(actual, np.ndarray): - actual = actual.todense() - expected = arr.todense().reshape(new_shape) - - np.testing.assert_array_equal(actual, expected) - - # CSF - csf_shape = (2, 2, 4) - for shape, new_shape, expected_arrs in [ - ( - csf_shape, - (4, 4, 1), - [ - np.array([0, 0, 3, 5, 7]), - np.array([0, 1, 3, 0, 3, 0, 1]), - np.array([0, 1, 2, 3, 4, 5, 6, 7]), - np.array([0, 0, 0, 0, 0, 0, 0]), - np.array([1, 2, 3, 4, 5, 6, 7]), - ], - ), - ( - csf_shape, - (2, 1, 8), - [ - np.array([0, 1, 2]), - np.array([0, 0]), - np.array([0, 3, 7]), - np.array([4, 5, 7, 0, 3, 4, 5]), - np.array([1, 2, 3, 4, 5, 6, 7]), - ], - ), - ]: - csf = get_exampe_csf_arrays(dtype) - csf_tensor = sparse.asarray(csf, shape=shape, dtype=sparse.asdtype(dtype), format="csf") - - result = sparse.reshape(csf_tensor, shape=new_shape).to_scipy_sparse() - - for actual, expected in zip(result, expected_arrs, strict=False): - np.testing.assert_array_equal(actual, expected) - # DENSE - # NOTE: dense reshape is probably broken in MLIR in 19.x branch - # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE) +def test_copy(): + arr_np_orig = np.arange(25).reshape((5, 5)) + arr_np_copy = arr_np_orig.copy() + arr_sp1 = sparse.asarray(arr_np_copy, copy=True) + arr_sp2 = sparse.asarray(arr_np_copy, copy=False).copy() + arr_sp3 = sparse.asarray(arr_np_copy, copy=False) + arr_np_copy[2, 2] = 42 -@parametrize_dtypes -def test_broadcast_to(dtype): - # CSR, CSC, COO - for shape, new_shape, dimensions, input_arr, expected_arrs in [ - ( - (3, 4), - (2, 3, 4), - [0], - np.array([[0, 1, 0, 3], [0, 0, 4, 5], [6, 7, 0, 0]]), - [ - np.array([0, 3, 6]), - np.array([0, 1, 2, 0, 1, 2]), - np.array([0, 2, 4, 6, 8, 10, 12]), - np.array([1, 3, 2, 3, 0, 1, 1, 3, 2, 3, 0, 1]), - np.array([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0]), - ], - ), - ( - (4, 2), - (4, 2, 2), - [1], - np.array([[0, 1], [0, 0], [2, 3], [4, 0]]), - [ - np.array([0, 2, 2, 4, 6]), - np.array([0, 1, 0, 1, 0, 1]), - np.array([0, 1, 2, 4, 6, 7, 8]), - np.array([1, 1, 0, 1, 0, 1, 0, 0]), - np.array([1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0]), - ], - ), - ]: - for fn_format in [sps.csr_array, sps.csc_array, sps.coo_array]: - arr = fn_format(input_arr, shape=shape, dtype=dtype) - arr.sum_duplicates() - tensor = sparse.asarray(arr) - result = sparse.broadcast_to(tensor, new_shape, dimensions=dimensions).to_scipy_sparse() - - for actual, expected in zip(result, expected_arrs, strict=False): - np.testing.assert_allclose(actual, expected) - - # DENSE - np_arr = np.array([0, 0, 2, 3, 0, 1]) - arr = np.asarray(np_arr, dtype=dtype) - tensor = sparse.asarray(arr) - result = sparse.broadcast_to(tensor, (3, 6), dimensions=[0]).to_scipy_sparse() - - assert result.format == "csr" - np.testing.assert_allclose(result.todense(), np.repeat(np_arr[np.newaxis], 3, axis=0)) + np.testing.assert_array_equal(sparse.to_numpy(arr_sp1), arr_np_orig) + np.testing.assert_array_equal(sparse.to_numpy(arr_sp2), arr_np_orig) + np.testing.assert_array_equal(sparse.to_numpy(arr_sp3), arr_np_copy)