diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9af4eb1..3df650f8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: exclude: ".ipynb" - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.5 + rev: v0.6.7 hooks: - id: ruff args: ["--fix"] diff --git a/README.md b/README.md new file mode 100644 index 00000000..7a87bc1d --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +![Sparse](docs/assets/images/logo_with_text.svg) + +# Sparse Multidimensional Arrays + +[![Build Status](https://github.com/pydata/sparse/actions/workflows/ci.yml/badge.svg)]( + https://github.com/pydata/sparse/actions/workflows/ci.yml) +[![Docs Status](https://readthedocs.org/projects/sparse-nd/badge/?version=latest)]( + http://sparse.pydata.org/en/latest/?badge=latest) +[![Coverage](https://codecov.io/gh/pydata/sparse/branch/main/graph/badge.svg)]( + https://codecov.io/gh/pydata/sparse) + +## This library provides multi-dimensional sparse arrays. + +- 📚 [Documentation](http://sparse.pydata.org) + +- 🙌 [Contributing](https://github.com/pydata/sparse/blob/main/docs/contributing.md) + +- 🪲 [Bug Reports/Feature Requests](https://github.com/pydata/sparse/issues) diff --git a/README.rst b/README.rst deleted file mode 100644 index cc59c6e6..00000000 --- a/README.rst +++ /dev/null @@ -1,22 +0,0 @@ -Sparse Multidimensional Arrays -============================== - -|Build Status| |Docs Status| |Coverage| - -This library provides multi-dimensional sparse arrays. - -* `Documentation `_ -* `Contributing `_ -* `Bug Reports/Feature Requests `_ - -.. |Build Status| image:: https://github.com/pydata/sparse/actions/workflows/ci.yml/badge.svg - :target: https://github.com/pydata/sparse/actions/workflows/ci.yml - :alt: Build status - -.. |Docs Status| image:: https://readthedocs.org/projects/sparse-nd/badge/?version=latest - :target: http://sparse.pydata.org/en/latest/?badge=latest - :alt: Documentation Status - -.. |Coverage| image:: https://codecov.io/gh/pydata/sparse/branch/main/graph/badge.svg - :target: https://codecov.io/gh/pydata/sparse - :alt: Coverage Report diff --git a/docs/assets/images/logo_with_text.svg b/docs/assets/images/logo_with_text.svg new file mode 100644 index 00000000..2edd5f92 --- /dev/null +++ b/docs/assets/images/logo_with_text.svg @@ -0,0 +1,1142 @@ + +sparsesparse diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index 192217a3..93eefe1d 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -15,10 +15,12 @@ ) from ._ops import ( add, + reshape, ) __all__ = [ "add", "asarray", "asdtype", + "reshape", ] diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py index 9a975ffa..658a7291 100644 --- a/sparse/mlir_backend/_constructors.py +++ b/sparse/mlir_backend/_constructors.py @@ -239,7 +239,7 @@ def from_sps(cls, arr: np.ndarray) -> "Dense": return dense_instance - def to_sps(self, shape: tuple[int, ...]) -> sps.csr_array: + def to_sps(self, shape: tuple[int, ...]) -> np.ndarray: data = ranked_memref_to_numpy(self.data) return data.reshape(shape) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 351c16b6..8e8a0ba7 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -5,10 +5,12 @@ from mlir import ir from mlir.dialects import arith, func, linalg, sparse_tensor, tensor +import numpy as np + from ._common import fn_cache -from ._constructors import Tensor +from ._constructors import Tensor, numpy_to_ranked_memref from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx, pm -from ._dtypes import DType, FloatingDType +from ._dtypes import DType, FloatingDType, Index @fn_cache @@ -68,11 +70,35 @@ def add(a, b): return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) +@fn_cache +def get_reshape_module( + a_tensor_type: ir.RankedTensorType, + shape_tensor_type: ir.RankedTensorType, + out_tensor_type: ir.RankedTensorType, +) -> ir.Module: + with ir.Location.unknown(ctx): + module = ir.Module.create() + + with ir.InsertionPoint(module.body): + + @func.FuncOp.from_py_func(a_tensor_type, shape_tensor_type) + def reshape(a, shape): + return tensor.reshape(out_tensor_type, a, shape) + + reshape.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() + if DEBUG: + (CWD / "reshape_module.mlir").write_text(str(module)) + pm.run(module.operation) + if DEBUG: + (CWD / "reshape_module_opt.mlir").write_text(str(module)) + + return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) + + def add(x1: Tensor, x2: Tensor) -> Tensor: ret_obj = x1._format_class() out_tensor_type = x1._obj.get_tensor_definition(x1.shape) - # TODO: Add proper caching # TODO: Decide what will be the output tensor_type add_module = get_add_module( x1._obj.get_tensor_definition(x1.shape), @@ -88,3 +114,24 @@ def add(x1: Tensor, x2: Tensor) -> Tensor: *x2._obj.to_module_arg(), ) return Tensor(ret_obj, shape=out_tensor_type.shape) + + +def reshape(x: Tensor, /, shape: tuple[int, ...]) -> Tensor: + ret_obj = x._format_class() + x_tensor_type = x._obj.get_tensor_definition(x.shape) + out_tensor_type = x._obj.get_tensor_definition(shape) + + with ir.Location.unknown(ctx): + shape_tensor_type = ir.RankedTensorType.get([len(shape)], Index.get_mlir_type()) + + reshape_module = get_reshape_module(x_tensor_type, shape_tensor_type, out_tensor_type) + + shape = np.array(shape) + reshape_module.invoke( + "reshape", + ctypes.pointer(ctypes.pointer(ret_obj)), + *x._obj.to_module_arg(), + ctypes.pointer(ctypes.pointer(numpy_to_ranked_memref(shape))), + ) + + return Tensor(ret_obj, shape=out_tensor_type.shape) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index fb462fc5..fd5276ba 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -75,6 +75,15 @@ def sampler_real_floating(size: tuple[int, ...]): raise NotImplementedError(f"{dtype=} not yet supported.") +def get_exampe_csf_arrays(dtype: np.dtype) -> tuple: + pos_1 = np.array([0, 1, 3], dtype=np.int64) + crd_1 = np.array([1, 0, 1], dtype=np.int64) + pos_2 = np.array([0, 3, 5, 7], dtype=np.int64) + crd_2 = np.array([0, 1, 3, 0, 3, 0, 1], dtype=np.int64) + data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype) + return pos_1, crd_1, pos_2, crd_2, data + + @parametrize_dtypes @pytest.mark.parametrize("shape", [(100,), (10, 200), (5, 10, 20)]) def test_dense_format(dtype, shape): @@ -176,11 +185,7 @@ def test_add(rng, dtype): @parametrize_dtypes def test_csf_format(dtype): SHAPE = (2, 2, 4) - pos_1 = np.array([0, 1, 3], dtype=np.int64) - crd_1 = np.array([1, 0, 1], dtype=np.int64) - pos_2 = np.array([0, 3, 5, 7], dtype=np.int64) - crd_2 = np.array([0, 1, 3, 0, 3, 0, 1], dtype=np.int64) - data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype) + pos_1, crd_1, pos_2, crd_2, data = get_exampe_csf_arrays(dtype) csf = [pos_1, crd_1, pos_2, crd_2, data] csf_tensor = sparse.asarray(csf, shape=SHAPE, dtype=sparse.asdtype(dtype), format="csf") @@ -192,3 +197,70 @@ def test_csf_format(dtype): csf_2 = [pos_1, crd_1, pos_2, crd_2, data * 2] for actual, expected in zip(res_tensor, csf_2, strict=False): np.testing.assert_array_equal(actual, expected) + + +@parametrize_dtypes +def test_reshape(rng, dtype): + DENSITY = 0.5 + sampler = generate_sampler(dtype, rng) + + # CSR, CSC, COO + for shape, new_shape in [((100, 50), (25, 200)), ((80, 1), (8, 10))]: + for format in ["csr", "csc", "coo"]: + if format == "coo": + # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 + continue + if format == "csc": + # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641 + continue + + arr = sps.random_array( + shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler + ) + if format == "coo": + arr.sum_duplicates() + + tensor = sparse.asarray(arr) + + actual = sparse.reshape(tensor, shape=new_shape).to_scipy_sparse() + expected = arr.todense().reshape(new_shape) + + np.testing.assert_array_equal(actual.todense(), expected) + + # CSF + csf_shape = (2, 2, 4) + for shape, new_shape, expected_arrs in [ + ( + csf_shape, + (4, 4, 1), + [ + np.array([0, 0, 3, 5, 7]), + np.array([0, 1, 3, 0, 3, 0, 1]), + np.array([0, 1, 2, 3, 4, 5, 6, 7]), + np.array([0, 0, 0, 0, 0, 0, 0]), + np.array([1, 2, 3, 4, 5, 6, 7]), + ], + ), + ( + csf_shape, + (2, 1, 8), + [ + np.array([0, 1, 2]), + np.array([0, 0]), + np.array([0, 3, 7]), + np.array([4, 5, 7, 0, 3, 4, 5]), + np.array([1, 2, 3, 4, 5, 6, 7]), + ], + ), + ]: + csf = get_exampe_csf_arrays(dtype) + csf_tensor = sparse.asarray(csf, shape=shape, dtype=sparse.asdtype(dtype), format="csf") + + result = sparse.reshape(csf_tensor, shape=new_shape).to_scipy_sparse() + + for actual, expected in zip(result, expected_arrs, strict=False): + np.testing.assert_array_equal(actual, expected) + + # DENSE + # NOTE: dense reshape is probably broken in MLIR + # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE)