Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Update MLIR backend to LLVM 20.dev #799

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ jobs:
steps:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: mamba-org/setup-micromamba@v2
hameerabbasi marked this conversation as resolved.
Show resolved Hide resolved
- uses: mamba-org/setup-micromamba@v1.9.0
with:
# NOTE: https://github.com/mamba-org/setup-micromamba/issues/227
micromamba-version: '1.5.10-0'
environment-file: ci/environment.yml
init-shell: >-
bash
Expand Down
4 changes: 2 additions & 2 deletions ci/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- pytest
- pytest-cov
- pytest-xdist
- mlir-python-bindings==19.*
- pip:
- finch-tensor >=0.1.31
- finch-tensor>=0.1.31
- finch-mlir>=0.0.2
- pytest-codspeed
4 changes: 2 additions & 2 deletions sparse/mlir_backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
try:
import mlir # noqa: F401
import mlir_finch # noqa: F401

del mlir
del mlir_finch
except ModuleNotFoundError as e:
raise ImportError(
"MLIR Python bindings not installed. Run "
Expand Down
2 changes: 1 addition & 1 deletion sparse/mlir_backend/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import functools
import weakref

import mlir.runtime as rt
import mlir_finch.runtime as rt

import numpy as np

Expand Down
18 changes: 11 additions & 7 deletions sparse/mlir_backend/_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,17 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array:
case "coo":
if copy is not None and not copy:
raise RuntimeError(f"`scipy.sparse.{type(arr.__name__)}` cannot be zero-copy converted.")
coords = np.stack([arr.row, arr.col], axis=1)
row, col = arr.row, arr.col
hameerabbasi marked this conversation as resolved.
Show resolved Hide resolved
if row.dtype != col.dtype:
raise RuntimeError(f"`row` and `col` dtypes must be the same: {row.dtype} != {col.dtype}.")
pos = np.array([0, arr.nnz], dtype=np.int64)
pos_width = pos.dtype.itemsize * 8
crd_width = coords.dtype.itemsize * 8
crd_width = row.dtype.itemsize * 8
data = arr.data
if copy:
data = arr.data.copy()
data = data.copy()
row = row.copy()
col = col.copy()

level_props = LevelProperties(0)
if not arr.has_canonical_format:
Expand All @@ -109,15 +113,15 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array:
coo_format = get_storage_format(
levels=(
Level(LevelFormat.Compressed, level_props | LevelProperties.NonUnique),
Level(LevelFormat.Singleton, level_props),
Level(LevelFormat.Singleton, level_props | LevelProperties.SOA),
),
order=(0, 1),
pos_width=pos_width,
crd_width=crd_width,
dtype=arr.dtype,
)

return from_constituent_arrays(format=coo_format, arrays=(pos, coords, data), shape=arr.shape)
return from_constituent_arrays(format=coo_format, arrays=(pos, row, col, data), shape=arr.shape)
case _:
raise NotImplementedError(f"No conversion implemented for `scipy.sparse.{type(arr.__name__)}`.")

Expand All @@ -133,8 +137,8 @@ def to_scipy(arr: Array) -> ScipySparseArray:
return sps.csr_array((data, indices, indptr), shape=arr.shape)
return sps.csc_array((data, indices, indptr), shape=arr.shape)
case (Level(LevelFormat.Compressed, _), Level(LevelFormat.Singleton, _)):
_, coords, data = arr.get_constituent_arrays()
return sps.coo_array((data, (coords[:, 0], coords[:, 1])), shape=arr.shape)
_, row, col, data = arr.get_constituent_arrays()
return sps.coo_array((data, (row, col)), shape=arr.shape)
case _:
raise RuntimeError(f"No conversion implemented for `{storage_format=}`.")

Expand Down
18 changes: 16 additions & 2 deletions sparse/mlir_backend/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,28 @@
import ctypes.util
import os
import pathlib
import sys

from mlir.ir import Context
from mlir.passmanager import PassManager
from mlir_finch.ir import Context
from mlir_finch.passmanager import PassManager

DEBUG = bool(int(os.environ.get("DEBUG", "0")))
CWD = pathlib.Path(".")

finch_lib_path = f"{sys.prefix}/lib/python3.{sys.version_info.minor}/site-packages/lib"

ld_library_path = os.environ.get("LD_LIBRARY_PATH")
ld_library_path = f"{finch_lib_path}:{ld_library_path}" if ld_library_path is None else finch_lib_path
os.environ["LD_LIBRARY_PATH"] = ld_library_path

MLIR_C_RUNNER_UTILS = ctypes.util.find_library("mlir_c_runner_utils")
if os.name == "posix" and MLIR_C_RUNNER_UTILS is not None:
MLIR_C_RUNNER_UTILS = f"{finch_lib_path}/{MLIR_C_RUNNER_UTILS}"

SHARED_LIBS = []
if MLIR_C_RUNNER_UTILS is not None:
SHARED_LIBS.append(MLIR_C_RUNNER_UTILS)

libc = ctypes.CDLL(ctypes.util.find_library("c")) if os.name != "nt" else ctypes.cdll.msvcrt
libc.free.argtypes = [ctypes.c_void_p]
libc.free.restype = None
Expand Down
4 changes: 2 additions & 2 deletions sparse/mlir_backend/_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import math
import sys

import mlir.runtime as rt
from mlir import ir
import mlir_finch.runtime as rt
from mlir_finch import ir

import numpy as np

Expand Down
22 changes: 12 additions & 10 deletions sparse/mlir_backend/_ops.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import ctypes

import mlir.execution_engine
import mlir.passmanager
from mlir import ir
from mlir.dialects import arith, complex, func, linalg, sparse_tensor, tensor
import mlir_finch.execution_engine
import mlir_finch.passmanager
from mlir_finch import ir
from mlir_finch.dialects import arith, complex, func, linalg, sparse_tensor, tensor

from ._array import Array
from ._common import fn_cache
from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx, pm
from ._core import CWD, DEBUG, SHARED_LIBS, ctx, pm
from ._dtypes import DType, IeeeComplexFloatingDType, IeeeRealFloatingDType, IntegerDType


Expand Down Expand Up @@ -37,7 +37,7 @@ def get_add_module(

@func.FuncOp.from_py_func(a_tensor_type, b_tensor_type)
def add(a, b):
out = tensor.empty(out_tensor_type, [])
out = tensor.empty(out_tensor_type.shape, dtype, encoding=out_tensor_type.encoding)
generic_op = linalg.GenericOp(
[out_tensor_type],
[a, b],
Expand Down Expand Up @@ -72,7 +72,7 @@ def add(a, b):
if DEBUG:
(CWD / "add_module_opt.mlir").write_text(str(module))

return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS)


@fn_cache
Expand All @@ -97,7 +97,7 @@ def reshape(a, shape):
if DEBUG:
(CWD / "reshape_module_opt.mlir").write_text(str(module))

return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS)


@fn_cache
Expand All @@ -113,7 +113,9 @@ def get_broadcast_to_module(

@func.FuncOp.from_py_func(in_tensor_type)
def broadcast_to(in_tensor):
out = tensor.empty(out_tensor_type, [])
out = tensor.empty(
out_tensor_type.shape, out_tensor_type.element_type, encoding=out_tensor_type.encoding
)
return linalg.broadcast(in_tensor, outs=[out], dimensions=dimensions)

broadcast_to.func_op.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
Expand All @@ -123,7 +125,7 @@ def broadcast_to(in_tensor):
if DEBUG:
(CWD / "broadcast_to_module_opt.mlir").write_text(str(module))

return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS])
return mlir_finch.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=SHARED_LIBS)


def add(x1: Array, x2: Array) -> Array:
Expand Down
20 changes: 17 additions & 3 deletions sparse/mlir_backend/levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import re
import typing

from mlir import ir
from mlir.dialects import sparse_tensor
from mlir_finch import ir
from mlir_finch.dialects import sparse_tensor

import numpy as np

Expand Down Expand Up @@ -36,6 +36,7 @@ def _camel_to_snake(name: str) -> str:
class LevelProperties(enum.Flag):
NonOrdered = enum.auto()
NonUnique = enum.auto()
SOA = enum.auto()

def build(self) -> list[sparse_tensor.LevelProperty]:
return [getattr(sparse_tensor.LevelProperty, _camel_to_snake(p.name)) for p in type(self) if p in self]
Expand Down Expand Up @@ -108,15 +109,28 @@ def _get_ctypes_type(self, *, owns_memory=False):
def get_fields():
fields = []
compressed_counter = 0
singleton_counter = 0
for level, next_level in itertools.zip_longest(self.levels, self.levels[1:]):
if LevelFormat.Compressed == level.format:
compressed_counter += 1
fields.append((f"pointers_to_{compressed_counter}", get_nd_memref_descr(1, ptr_dtype)))
if next_level is not None and LevelFormat.Singleton == next_level.format:
fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(2, idx_dtype)))
singleton_counter += 1
fields.append(
(
f"indices_{compressed_counter}_coords_{singleton_counter}",
get_nd_memref_descr(1, idx_dtype),
)
)
else:
fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(1, idx_dtype)))

if LevelFormat.Singleton == level.format:
singleton_counter += 1
fields.append(
(f"indices_{compressed_counter}_coords_{singleton_counter}", get_nd_memref_descr(1, idx_dtype))
)

Comment on lines +118 to +133
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably handle SOA and without SOA separately.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion we should only support SoA singleton format:

  1. Non-SoA singleton looks to be buggy for basic operations link
  2. Mixed singleton levels aren't allowed: https://github.com/llvm/llvm-project/blob/8d38fbf2f027c72332c8ba03ff0ff0f83b4dcf02/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp#L811

What would be the benefit of supporting non-SoA singleton levels separately?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'd be able to support the current COO format only for non-SoA.

Copy link
Collaborator Author

@mtsokol mtsokol Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by that? Can you give some example?
With this PR we can support COO format (also input objects of type scipy.sparse.coo_array) where MLIR-backend implementation uses SoA representation.

Copy link
Collaborator

@hameerabbasi hameerabbasi Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing Numba COO format uses the non-SoA format, and we pretty much have to support this to be backwards compatible. Doesn't have to be in this PR though.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'm still missing the point here. Numba is a separate backend that supports only 1D and 2D COO arrays. MLIR backend supports >=2D COO arrays.
What do you mean by backward compatibility here? Can you give an example where backward compatibility breaks here? If a user passes scipy.sparse.coo_array object to sparse.asarray function it it will work for any backend regardless of an internal representation (SoA or non-SoA).

Copy link
Collaborator

@hameerabbasi hameerabbasi Nov 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, sparse.COO has a constructor that takes coords and a .coords attribute. The attribute is a 2D NumPy array.

Ideally, I'd like to keep it a 2D np.ndarray as otherwise I'm not 100% sure how much will break.

We could do this with np.stack, but that would incur a performance penalty.

Also the current Numba backend supports ND, not 2D. SciPy supports only 2D, however.

I'm thinking of a future where all of sparse is powered by Finch-MLIR, ideally.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for accessing coords, np.stack could be an option. My caveat for non-SoA in MLIR backend is that it already has some issues with basic operations: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247

fields.append(("values", get_nd_memref_descr(1, self.dtype)))
return fields

Expand Down
36 changes: 18 additions & 18 deletions sparse/mlir_backend/tests/test_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,20 +176,18 @@ def test_add(rng, dtype):
assert isinstance(actual, np.ndarray)
np.testing.assert_array_equal(actual, expected)

# NOTE: Fixed in https://github.com/llvm/llvm-project/pull/108615
# actual = sparse.add(c_tensor, c_tensor).to_scipy_sparse()
# expected = c + c
# assert isinstance(actual, np.ndarray)
# np.testing.assert_array_equal(actual, expected)
actual = sparse.to_numpy(sparse.add(dense_tensor, dense_tensor))
expected = dense + dense
assert isinstance(actual, np.ndarray)
np.testing.assert_array_equal(actual, expected)

actual = sparse.to_scipy(sparse.add(csr_2_tensor, coo_tensor))
expected = csr_2 + coo
assert_csx_equal(expected, actual)

# NOTE: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247
# actual = sparse.add(d_tensor, d_tensor).to_scipy_sparse()
# expected = d + d
# np.testing.assert_array_equal(actual.todense(), expected.todense())
actual = sparse.to_scipy(sparse.add(coo_tensor, coo_tensor))
expected = coo + coo
np.testing.assert_array_equal(actual.todense(), expected.todense())


@parametrize_dtypes
Expand Down Expand Up @@ -226,8 +224,11 @@ def test_coo_3d_format(dtype):
format = sparse.levels.get_storage_format(
levels=(
sparse.levels.Level(sparse.levels.LevelFormat.Compressed, sparse.levels.LevelProperties.NonUnique),
sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.NonUnique),
sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties(0)),
sparse.levels.Level(
sparse.levels.LevelFormat.Singleton,
sparse.levels.LevelProperties.NonUnique | sparse.levels.LevelProperties.SOA,
),
sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.SOA),
),
order="C",
pos_width=64,
Expand All @@ -237,20 +238,19 @@ def test_coo_3d_format(dtype):

SHAPE = (2, 2, 4)
pos = np.array([0, 7])
crd = np.array([[0, 1, 0, 0, 1, 1, 0], [1, 3, 1, 0, 0, 1, 0], [3, 1, 1, 0, 1, 1, 1]])
crd = [np.array([0, 1, 0, 0, 1, 1, 0]), np.array([1, 3, 1, 0, 0, 1, 0]), np.array([3, 1, 1, 0, 1, 1, 1])]
data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype)
carrs = (pos, crd, data)
carrs = (pos, *crd, data)

coo_array = sparse.from_constituent_arrays(format=format, arrays=carrs, shape=SHAPE)
result = coo_array.get_constituent_arrays()
for actual, expected in zip(result, carrs, strict=True):
np.testing.assert_array_equal(actual, expected)

# NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135
# res_arrays = sparse.add(coo_array, coo_array).get_constituent_arrays()
# res_expected = (pos, crd, data * 2)
# for actual, expected in zip(res_arrays, res_expected, strict=False):
# np.testing.assert_array_equal(actual, expected)
result_arrays = sparse.add(coo_array, coo_array).get_constituent_arrays()
constituent_arrays = (pos, *crd, data * 2)
for actual, expected in zip(result_arrays, constituent_arrays, strict=False):
np.testing.assert_array_equal(actual, expected)


@parametrize_dtypes
Expand Down
Loading