From cb7e3cb9a664e2c37d224471b1181069f577bbff Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Tue, 4 Jul 2023 15:43:20 +0200 Subject: [PATCH] performance improvement for Sparse*.nonzero, also changed interface Now uses pure array functions, should be much faster! Signed-off-by: Nick Papior --- CHANGELOG.md | 1 + src/sisl/sparse.py | 49 +++++++++++++------------- src/sisl/sparse_geometry.py | 16 ++++----- src/sisl/tests/test_sparse.py | 10 +++--- src/sisl/tests/test_sparse_geometry.py | 4 +-- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 572d247d40..eb60d218f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ we hit release version 1.0.0. - `BrillouinZone.merge` allows simple merging of several objects, #537 ### Changed +- interface for Sparse*.nonzero(), arguments suffixed with 's' - `stdoutSileVASP` will not accept `all=` arguments - `stdoutSileVASP.read_energy` returns as default the next item (no longer the last) - `txtSileOrca` will not accept `all=` arguments, see #584 diff --git a/src/sisl/sparse.py b/src/sisl/sparse.py index a272437f3e..bff05e6a22 100644 --- a/src/sisl/sparse.py +++ b/src/sisl/sparse.py @@ -1262,38 +1262,37 @@ def __contains__(self, key): # Get indices of sparse data (-1 if non-existing) return np_all(self._get(key[0], key[1]) >= 0) - def nonzero(self, row=None, only_col=False): + def nonzero(self, rows=None, only_cols=False): """ Row and column indices where non-zero elements exists Parameters ---------- - row : int or array_like of int, optional + rows : int or array_like of int, optional only return the tuples for the requested rows, default is all rows - only_col : bool, optional - only return then non-zero columns + only_cols : bool, optional + only return the non-zero columns """ - if row is None: - idx = array_arange(self.ptr[:-1], n=self.ncol) - if not only_col: - rows = _a.emptyi([self.nnz]) - j = 0 - for r, N in enumerate(self.ncol): - rows[j:j+N] = r - j += N + ptr = self.ptr + ncol = self.ncol + col = self.col + + if rows is None: + # all rows will be returned + cols = col[array_arange(ptr[:-1], n=ncol, dtype=int32)] + if not only_cols: + idx = (ncol > 0).nonzero()[0] + rows = repeat(idx.astype(int32, copy=False), ncol[idx]) else: - row = asarrayi(row).ravel() - idx = array_arange(self.ptr[row], n=self.ncol[row]) - if not only_col: - N = _a.sumi(self.ncol[row]) - rows = _a.emptyi([N]) - j = 0 - for r, N in zip(row, self.ncol[row]): - rows[j:j+N] = r - j += N - - if only_col: - return self.col[idx] - return rows, self.col[idx] + rows = _a.asarray(rows).ravel() + ncol = ncol[rows] + cols = col[array_arange(ptr[rows], n=ncol, dtype=int32)] + if not only_cols: + idx = (ncol > 0).nonzero()[0] + rows = repeat(rows[idx].astype(int32, copy=False), ncol[idx]) + + if only_cols: + return cols + return rows, cols def eliminate_zeros(self, atol=0.): """ Remove all zero elememts from the sparse matrix diff --git a/src/sisl/sparse_geometry.py b/src/sisl/sparse_geometry.py index 3dd66375e6..1fca6d959a 100644 --- a/src/sisl/sparse_geometry.py +++ b/src/sisl/sparse_geometry.py @@ -1048,21 +1048,21 @@ def __setitem__(self, key, val): def _size(self): return self.geometry.na - def nonzero(self, atoms=None, only_col=False): + def nonzero(self, atoms=None, only_cols=False): """ Indices row and column indices where non-zero elements exists Parameters ---------- atoms : int or array_like of int, optional only return the tuples for the requested atoms, default is all atoms - only_col : bool, optional + only_cols : bool, optional only return then non-zero columns See Also -------- SparseCSR.nonzero : the equivalent function call """ - return self._csr.nonzero(row=atoms, only_col=only_col) + return self._csr.nonzero(rows=atoms, only_cols=only_cols) def iter_nnz(self, atoms=None): """ Iterations of the non-zero elements @@ -1495,7 +1495,7 @@ def edges(self, atoms=None, exclude=None, orbitals=None): return unique(self.geometry.o2a(self._csr.edges(self.geometry.a2o(atoms, True), exclude))) return self._csr.edges(orbitals, exclude) - def nonzero(self, atoms=None, only_col=False): + def nonzero(self, atoms=None, only_cols=False): """ Indices row and column indices where non-zero elements exists Parameters @@ -1503,7 +1503,7 @@ def nonzero(self, atoms=None, only_col=False): atoms : int or array_like of int, optional only return the tuples for the requested atoms, default is all atoms But for *all* orbitals. - only_col : bool, optional + only_cols : bool, optional only return then non-zero columns See Also @@ -1511,9 +1511,9 @@ def nonzero(self, atoms=None, only_col=False): SparseCSR.nonzero : the equivalent function call """ if atoms is None: - return self._csr.nonzero(only_col=only_col) - row = self.geometry.a2o(atoms, all=True) - return self._csr.nonzero(row=row, only_col=only_col) + return self._csr.nonzero(only_cols=only_cols) + rows = self.geometry.a2o(atoms, all=True) + return self._csr.nonzero(rows=rows, only_cols=only_cols) def iter_nnz(self, atoms=None, orbitals=None): """ Iterations of the non-zero elements diff --git a/src/sisl/tests/test_sparse.py b/src/sisl/tests/test_sparse.py index 9ce92a24da..c4ac603d6a 100644 --- a/src/sisl/tests/test_sparse.py +++ b/src/sisl/tests/test_sparse.py @@ -871,15 +871,15 @@ def test_nonzero1(setup): r, c = s1.nonzero() assert np.all(r == [1, 1, 1, 2]) assert np.all(c == [1, 2, 3, 1]) - c = s1.nonzero(only_col=True) + c = s1.nonzero(only_cols=True) assert np.all(c == [1, 2, 3, 1]) - c = s1.nonzero(row=1, only_col=True) + c = s1.nonzero(rows=1, only_cols=True) assert np.all(c == [1, 2, 3]) - c = s1.nonzero(row=2, only_col=True) + c = s1.nonzero(rows=2, only_cols=True) assert np.all(c == [1]) - c = s1.nonzero(row=[0, 1], only_col=True) + c = s1.nonzero(rows=[0, 1], only_cols=True) assert np.all(c == [1, 2, 3]) - r, c = s1.nonzero(row=[0, 1]) + r, c = s1.nonzero(rows=[0, 1]) assert np.all(r == [1, 1, 1]) assert np.all(c == [1, 2, 3]) s1.empty() diff --git a/src/sisl/tests/test_sparse_geometry.py b/src/sisl/tests/test_sparse_geometry.py index ca4a0816af..bb5ee815d8 100644 --- a/src/sisl/tests/test_sparse_geometry.py +++ b/src/sisl/tests/test_sparse_geometry.py @@ -94,7 +94,7 @@ def test_nonzero1(self, setup): r, c = s2.nonzero() assert np.allclose(r, [0, 0, 0, 3, 3, 3]) assert np.allclose(c, [0, 2, 3, 1, 2, 3]) - c = s2.nonzero(only_col=True) + c = s2.nonzero(only_cols=True) assert np.allclose(c, [0, 2, 3, 1, 2, 3]) r, c = s2.nonzero(atoms=1) assert len(r) == 0 @@ -102,7 +102,7 @@ def test_nonzero1(self, setup): r, c = s2.nonzero(atoms=0) assert np.allclose(r, [0, 0, 0]) assert np.allclose(c, [0, 2, 3]) - c = s2.nonzero(atoms=0, only_col=True) + c = s2.nonzero(atoms=0, only_cols=True) assert np.allclose(c, [0, 2, 3]) def test_create_construct_different_length(self, setup):