From c733095d01b9078982eac490ac11d31edfe94edc Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:38:17 -0500 Subject: [PATCH 01/10] Add func to convert sparse matrix to pdarrays We convert sparse matrices to a 3 tuple of pdarrays with row, cols, and vals Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 59 +++++++++++++++++++++++++++++++++++++++- src/SparseMatrix.chpl | 9 ++++++ src/SparseMatrixMsg.chpl | 33 +++++++++++++++++++++- 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index 4af23abe03..323bbfcf65 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -1,14 +1,19 @@ from __future__ import annotations import builtins -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Union, cast import numpy as np from typeguard import typechecked +from arkouda.dtypes import int64 as akint64 +from arkouda.dtypes import ( + NumericDTypes +) from arkouda.client import generic_msg from arkouda.dtypes import dtype, int_scalars from arkouda.logger import getArkoudaLogger +from arkouda.pdarrayclass import create_pdarray, pdarray logger = getArkoudaLogger(name="sparrayclass") @@ -94,6 +99,58 @@ def __str__(self): # This won't work out of the box for sparrays need to add th # print("Called repr") # return generic_msg(cmd="repr", args={"array": self, "printThresh": sparrayIterThresh}) + """ + Converts the sparse matrix to a tuple of 3 pdarrays (rows, cols, vals) + Returns + ------- + tuple[ak.pdarray, ak.pdarray, ak.pdarray] + A tuple of 3 pdarrays which contain the row indices, the column indices, + and the values at the respective indices within the sparse matrix. + + Raises + ------ + RuntimeError + Raised if there is a server-side error thrown, if the pdarray size + exceeds the built-in client.maxTransferBytes size limit, or if the bytes + received does not match expected number of bytes + Notes + ----- + The number of bytes in the array cannot exceed ``client.maxTransferBytes``, + otherwise a ``RuntimeError`` will be raised. This is to protect the user + from overflowing the memory of the system on which the Python client + is running, under the assumption that the server is running on a + distributed system with much more memory than the client. The user + may override this limit by setting client.maxTransferBytes to a larger + value, but proceed with caution. + + Examples + -------- + >>> a = ak.random_sparse_matrix(100,0.2,"CSR"); + >>> a.to_pdarray() + ??? + + >>> type(a.to_ndarray()) + ??? + """ + def to_pdarray(self): + size = self.nnz + dtype = self.dtype + dtype_name = cast(np.dtype, dtype).name + # check dtype for error + if dtype is not akint64: # Hardcoded for int support only for now later change this to dtype_name not in NumericDTypes: + raise TypeError(f"unsupported dtype {dtype}") + repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size}) + vals = create_pdarray(repMsg) + akint64_name = cast(np.dtype, akint64).name + repMsg = generic_msg(cmd=f"create<{akint64_name},1>", args={"shape": size}) + rows = create_pdarray(repMsg) + repMsg = generic_msg(cmd=f"create<{akint64_name},1>", args={"shape": size}) + cols = create_pdarray(repMsg) + generic_msg(cmd="sparseToPdarray", args={"array": self, "rows": rows, "cols": cols, "vals": vals}) + return (rows, cols, vals) + + + # creates sparray object # only after: diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index ad6867b441..463be1eaa6 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -2,6 +2,15 @@ module SparseMatrix { public use SpsMatUtil; + + proc sparseMatToPdarray(spsMat, rows, cols, vals){ + + for (idx, (i, j)) in zip(1..3, spsMat.domain) { + rows[idx] = i; + cols[idx] = j; + vals[idx] = spsMat[i, j]; + } + } // sparse, outer, matrix-matrix multiplication algorithm; A is assumed // CSC and B CSR proc sparseMatMatMult(A, B) { diff --git a/src/SparseMatrixMsg.chpl b/src/SparseMatrixMsg.chpl index f28bcd0c82..d3abc76157 100644 --- a/src/SparseMatrixMsg.chpl +++ b/src/SparseMatrixMsg.chpl @@ -53,7 +53,6 @@ module SparseMatrixMsg { return new MsgTuple(errorMsg, MsgType.ERROR); } } - } @@ -80,9 +79,41 @@ module SparseMatrixMsg { } + proc sparseMatrixtoPdarray(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws { + + var gEnt = getGenericSparseArrayEntry(msgArgs.getValueOf("array"), st); + + var rows = st[msgArgs["rows"]]: SymEntry(int, 1); + var cols = st[msgArgs["cols"]]: SymEntry(int, 1); + var vals = st[msgArgs["vals"]]: SymEntry(int, 1); + + if gEnt.layoutStr=="CSC" { + // Hardcode for int right now + var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSC); + sparseMatToPdarray(sparrayEntry.a, rows.a, cols.a, vals.a); + } else if gEnt.layoutStr=="CSR" { + // Hardcode for int right now + var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSR); + sparseMatToPdarray(sparrayEntry.a, rows.a, cols.a, vals.a); + } else { + throw getErrorWithContext( + msg="unsupported layout for sparse matrix: %s".format(gEnt.layoutStr), + lineNumber=getLineNumber(), + routineName=getRoutineName(), + moduleName=getModuleName(), + errorClass="NotImplementedError" + ); + } + + sparseLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Converted sparse matrix to pdarray"); + return MsgTuple.success(); + } + + use CommandMap; registerFunction("random_sparse_matrix", randomSparseMatrixMsg, getModuleName()); registerFunction("sparse_matrix_matrix_mult", sparseMatrixMatrixMultMsg, getModuleName()); + registerFunction("sparseToPdarray", sparseMatrixtoPdarray, getModuleName()); } \ No newline at end of file From fce2d388b0f0f678a745e23182764f9e1ddfb3c5 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Fri, 30 Aug 2024 16:30:29 -0500 Subject: [PATCH 02/10] switch to using list of pdarrays We use the MsgTuple.fromResponses function to return a list of multiple pdarrays instead of a tupl, this minimzes the comm between the server and client. Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 25 ++++++++----------------- src/SparseMatrix.chpl | 5 ++--- src/SparseMatrixMsg.chpl | 20 +++++++++++++------- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index 323bbfcf65..abc2f3bc9b 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -13,7 +13,7 @@ from arkouda.client import generic_msg from arkouda.dtypes import dtype, int_scalars from arkouda.logger import getArkoudaLogger -from arkouda.pdarrayclass import create_pdarray, pdarray +from arkouda.pdarrayclass import create_pdarrays logger = getArkoudaLogger(name="sparrayclass") @@ -100,11 +100,11 @@ def __str__(self): # This won't work out of the box for sparrays need to add th # return generic_msg(cmd="repr", args={"array": self, "printThresh": sparrayIterThresh}) """ - Converts the sparse matrix to a tuple of 3 pdarrays (rows, cols, vals) + Converts the sparse matrix to a list of 3 pdarrays (rows, cols, vals) Returns ------- - tuple[ak.pdarray, ak.pdarray, ak.pdarray] - A tuple of 3 pdarrays which contain the row indices, the column indices, + List[ak.pdarray] + A list of 3 pdarrays which contain the row indices, the column indices, and the values at the respective indices within the sparse matrix. Raises @@ -137,20 +137,11 @@ def to_pdarray(self): dtype = self.dtype dtype_name = cast(np.dtype, dtype).name # check dtype for error - if dtype is not akint64: # Hardcoded for int support only for now later change this to dtype_name not in NumericDTypes: + if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") - repMsg = generic_msg(cmd=f"create<{dtype_name},1>", args={"shape": size}) - vals = create_pdarray(repMsg) - akint64_name = cast(np.dtype, akint64).name - repMsg = generic_msg(cmd=f"create<{akint64_name},1>", args={"shape": size}) - rows = create_pdarray(repMsg) - repMsg = generic_msg(cmd=f"create<{akint64_name},1>", args={"shape": size}) - cols = create_pdarray(repMsg) - generic_msg(cmd="sparseToPdarray", args={"array": self, "rows": rows, "cols": cols, "vals": vals}) - return (rows, cols, vals) - - - + responseArrays = generic_msg(cmd="sparse_to_pdarrays", args={"array": self}) + array_list = create_pdarrays(responseArrays); + return array_list # creates sparray object # only after: diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 463be1eaa6..b239fc017f 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -3,9 +3,8 @@ module SparseMatrix { public use SpsMatUtil; - proc sparseMatToPdarray(spsMat, rows, cols, vals){ - - for (idx, (i, j)) in zip(1..3, spsMat.domain) { + proc sparseMatToPdarray(const ref spsMat, ref rows, ref cols, ref vals){ + for((i,j), idx) in zip(spsMat.domain,0..) { rows[idx] = i; cols[idx] = j; vals[idx] = spsMat[i, j]; diff --git a/src/SparseMatrixMsg.chpl b/src/SparseMatrixMsg.chpl index d3abc76157..1eb38f149e 100644 --- a/src/SparseMatrixMsg.chpl +++ b/src/SparseMatrixMsg.chpl @@ -80,21 +80,23 @@ module SparseMatrixMsg { proc sparseMatrixtoPdarray(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws { + var repMsg: string; // response message with the details of the new arr var gEnt = getGenericSparseArrayEntry(msgArgs.getValueOf("array"), st); - var rows = st[msgArgs["rows"]]: SymEntry(int, 1); - var cols = st[msgArgs["cols"]]: SymEntry(int, 1); - var vals = st[msgArgs["vals"]]: SymEntry(int, 1); + var size = gEnt.nnz; + var rows = makeDistArray(size, int); + var cols = makeDistArray(size, int); + var vals = makeDistArray(size, int); if gEnt.layoutStr=="CSC" { // Hardcode for int right now var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSC); - sparseMatToPdarray(sparrayEntry.a, rows.a, cols.a, vals.a); + sparseMatToPdarray(sparrayEntry.a, rows, cols, vals); } else if gEnt.layoutStr=="CSR" { // Hardcode for int right now var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSR); - sparseMatToPdarray(sparrayEntry.a, rows.a, cols.a, vals.a); + sparseMatToPdarray(sparrayEntry.a, rows, cols, vals); } else { throw getErrorWithContext( msg="unsupported layout for sparse matrix: %s".format(gEnt.layoutStr), @@ -105,8 +107,12 @@ module SparseMatrixMsg { ); } + var responses: [0..2] MsgTuple; + responses[0] = st.insert(createSymEntry(rows)); + responses[1] = st.insert(createSymEntry(cols)); + responses[2] = st.insert(createSymEntry(vals)); sparseLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Converted sparse matrix to pdarray"); - return MsgTuple.success(); + return MsgTuple.fromResponses(responses); } @@ -114,6 +120,6 @@ module SparseMatrixMsg { use CommandMap; registerFunction("random_sparse_matrix", randomSparseMatrixMsg, getModuleName()); registerFunction("sparse_matrix_matrix_mult", sparseMatrixMatrixMultMsg, getModuleName()); - registerFunction("sparseToPdarray", sparseMatrixtoPdarray, getModuleName()); + registerFunction("sparse_to_pdarrays", sparseMatrixtoPdarray, getModuleName()); } \ No newline at end of file From aabdc44418fc2e6812792cf7698460b2efd8b291 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:57:07 -0500 Subject: [PATCH 03/10] Add fill_vals to populate a sparse array with values Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 17 ++++++++++------- src/SparseMatrix.chpl | 31 +++++++++++++++++++++++++++++++ src/SparseMatrixMsg.chpl | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 73 insertions(+), 8 deletions(-) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index abc2f3bc9b..fe778614d3 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -5,15 +5,13 @@ import numpy as np from typeguard import typechecked -from arkouda.dtypes import int64 as akint64 -from arkouda.dtypes import ( - NumericDTypes -) from arkouda.client import generic_msg -from arkouda.dtypes import dtype, int_scalars +from arkouda.dtypes import NumericDTypes, dtype +from arkouda.dtypes import int64 as akint64 +from arkouda.dtypes import int_scalars from arkouda.logger import getArkoudaLogger -from arkouda.pdarrayclass import create_pdarrays +from arkouda.pdarrayclass import create_pdarrays, pdarray logger = getArkoudaLogger(name="sparrayclass") @@ -132,6 +130,7 @@ def __str__(self): # This won't work out of the box for sparrays need to add th >>> type(a.to_ndarray()) ??? """ + @typechecked def to_pdarray(self): size = self.nnz dtype = self.dtype @@ -139,10 +138,14 @@ def to_pdarray(self): # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") - responseArrays = generic_msg(cmd="sparse_to_pdarrays", args={"array": self}) + responseArrays = generic_msg(cmd="sparse_to_pdarrays", args={"matrix": self}) array_list = create_pdarrays(responseArrays); return array_list + """""" + def fill_vals(self, a: pdarray): + generic_msg(cmd="fill_sparse_vals", args={"matrix": self, "vals": a}) + # creates sparray object # only after: # all values have been checked by python module and... diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index b239fc017f..5459e3cb60 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -3,6 +3,37 @@ module SparseMatrix { public use SpsMatUtil; + // Quick and dirty, not permanent + proc fillSparseMatrix(ref spsMat, const A: [?D] ?eltType) throws { + if A.rank != 1 then + throw getErrorWithContext( + msg="fill vals requires a 1D array; got a %iD array".format(A.rank), + lineNumber=getLineNumber(), + routineName=getRoutineName(), + moduleName=getModuleName(), + errorClass="IllegalArgumentError" + ); + if A.size != spsMat.domain.getNNZ() then + throw getErrorWithContext( + msg="fill vals requires an array of the same size as the sparse matrix; got %i elements, expected %i".format(A.size, spsMat.domain.getNNZ()), + lineNumber=getLineNumber(), + routineName=getRoutineName(), + moduleName=getModuleName(), + errorClass="IllegalArgumentError" + ); + if eltType != spsMat.eltType then + throw getErrorWithContext( + msg="fill vals requires an array of the same type as the sparse matrix; got %s, expected %s".format(eltType, spsMat.eltType), + lineNumber=getLineNumber(), + routineName=getRoutineName(), + moduleName=getModuleName(), + errorClass="IllegalArgumentError" + ); + for((i,j), idx) in zip(spsMat.domain,A.domain) { + spsMat[i,j] = A[idx]; + } + } + proc sparseMatToPdarray(const ref spsMat, ref rows, ref cols, ref vals){ for((i,j), idx) in zip(spsMat.domain,0..) { rows[idx] = i; diff --git a/src/SparseMatrixMsg.chpl b/src/SparseMatrixMsg.chpl index 1eb38f149e..52248a812b 100644 --- a/src/SparseMatrixMsg.chpl +++ b/src/SparseMatrixMsg.chpl @@ -82,7 +82,7 @@ module SparseMatrixMsg { proc sparseMatrixtoPdarray(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws { var repMsg: string; // response message with the details of the new arr - var gEnt = getGenericSparseArrayEntry(msgArgs.getValueOf("array"), st); + var gEnt = getGenericSparseArrayEntry(msgArgs.getValueOf("matrix"), st); var size = gEnt.nnz; var rows = makeDistArray(size, int); @@ -116,10 +116,41 @@ module SparseMatrixMsg { } + proc fillSparseMatrixMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws { + var repMsg: string; // response message with the details of the new arr + + var gEnt = getGenericSparseArrayEntry(msgArgs.getValueOf("matrix"), st); + var gEntVals: borrowed GenSymEntry = getGenericTypedArrayEntry(msgArgs.getValueOf("vals"), st); + + //Hardcode int for now + var vals = toSymEntry(gEntVals,int); + if gEnt.layoutStr=="CSC" { + // Hardcode for int right now + var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSC); + fillSparseMatrix(sparrayEntry.a, vals.a); + } else if gEnt.layoutStr=="CSR" { + // Hardcode for int right now + var sparrayEntry = gEnt.toSparseSymEntry(int, dimensions=2, layout.CSR); + fillSparseMatrix(sparrayEntry.a, vals.a); + } else { + throw getErrorWithContext( + msg="unsupported layout for sparse matrix: %s".format(gEnt.layoutStr), + lineNumber=getLineNumber(), + routineName=getRoutineName(), + moduleName=getModuleName(), + errorClass="NotImplementedError" + ); + } + sparseLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Filled sparse Array with values"); + return MsgTuple.success(); + } + + use CommandMap; registerFunction("random_sparse_matrix", randomSparseMatrixMsg, getModuleName()); registerFunction("sparse_matrix_matrix_mult", sparseMatrixMatrixMultMsg, getModuleName()); registerFunction("sparse_to_pdarrays", sparseMatrixtoPdarray, getModuleName()); + registerFunction("fill_sparse_vals", fillSparseMatrixMsg, getModuleName()); } \ No newline at end of file From a743b05fcae8fa620c05c1e9ee4b3d8dbc6cc659 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Fri, 30 Aug 2024 17:57:27 -0500 Subject: [PATCH 04/10] Add tests for sparse matrix features Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- tests/sparse_test.py | 94 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tests/sparse_test.py diff --git a/tests/sparse_test.py b/tests/sparse_test.py new file mode 100644 index 0000000000..baa9b86ef3 --- /dev/null +++ b/tests/sparse_test.py @@ -0,0 +1,94 @@ +import numpy as np +import pytest +import arkouda as ak + +class TestSparse: + + def test_utils(self): + csc = ak.random_sparse_matrix(10, 0.2, "CSC") + csr = ak.random_sparse_matrix(10, 0.2, "CSR") + vals_csc = csc.to_pdarray()[2].to_ndarray() + vals_csr = csr.to_pdarray()[2].to_ndarray() + assert np.all(vals_csc == 0) + assert np.all(vals_csr == 0) + fill_vals_csc = ak.randint(0, 10, csc.nnz) + fill_vals_csr = ak.randint(0, 10, csr.nnz) + csc.fill_vals(fill_vals_csc) + csr.fill_vals(fill_vals_csr) + vals_csc = csc.to_pdarray()[2].to_ndarray() + vals_csr = csr.to_pdarray()[2].to_ndarray() + assert np.all(vals_csc == fill_vals_csc.to_ndarray()) + assert np.all(vals_csr == fill_vals_csr.to_ndarray()) + + + + def test_matmatmult(self): + # Create a reference for matrix multiplication in python: + def matmatmult(rowsA, colsA, valsA, rowsB, colsB, valsB): + """ + Perform matrix-matrix multiplication of two sparse matrices represented by + 3 arrays of rows, cols, and vals. + A . B + Parameters + ---------- + rowsA : list or array-like + Row indices of non-zero elements in matrix A. + colsA : list or array-like + Column indices of non-zero elements in matrix A. + valsA : list or array-like + Values of non-zero elements in matrix A. + rowsB : list or array-like + Row indices of non-zero elements in matrix B. + colsB : list or array-like + Column indices of non-zero elements in matrix B. + valsB : list or array-like + Values of non-zero elements in matrix B. + + Returns + ------- + result_rows : list + Row indices of non-zero elements in the result matrix. + result_cols : list + Column indices of non-zero elements in the result matrix. + result_vals : list + Values of non-zero elements in the result matrix. + """ + from collections import defaultdict + + # Dictionary to accumulate the results + result = defaultdict(float) + + # Create a dictionary for quick lookup of matrix B elements + B_dict = defaultdict(list) + for r, c, v in zip(rowsB, colsB, valsB): + B_dict[r].append((c, v)) + + # Perform the multiplication + for rA, cA, vA in zip(rowsA, colsA, valsA): + if cA in B_dict: + for cB, vB in B_dict[cA]: + result[(rA, cB)] += vA * vB + + # Extract the results into separate lists + result_rows, result_cols, result_vals = zip(*[(r, c, v) for (r, c), v in result.items()]) + + return list(result_rows), list(result_cols), list(result_vals) + matA = ak.random_sparse_matrix(100, 1, 'CSC') # Make it fully dense to make testing easy + matB = ak.random_sparse_matrix(100, 1, 'CSR') # Make it fully dense to make testing easy + fill_vals_a = ak.randint(0, 10, matA.nnz) + fill_vals_b = ak.randint(0, 10, matB.nnz) + matA.fill_vals(fill_vals_a) + matB.fill_vals(fill_vals_b) + rowsA, colsA, valsA = (arr.to_ndarray() for arr in matA.to_pdarray()) + rowsB, colsB, valsB = (arr.to_ndarray() for arr in matB.to_pdarray()) + assert np.all(valsA == fill_vals_a.to_ndarray()) + assert np.all(valsB == fill_vals_b.to_ndarray()) + ans_rows, ans_cols, ans_vals = matmatmult(rowsA, colsA, valsA, rowsB, colsB, valsB) + + result = ak.sparse_matrix_matrix_mult(matA, matB) + result_rows, result_cols, result_vals = (arr.to_ndarray() for arr in result.to_pdarray()) + + # Check the result is correct + assert np.all(result_rows == ans_rows) + assert np.all(result_cols == ans_cols) + assert np.all(result_vals == ans_vals) \ No newline at end of file From a376ff590b6e9536569c1e86cf56b2e82f2d8761 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Fri, 30 Aug 2024 18:02:04 -0500 Subject: [PATCH 05/10] Add sparse_test to pytest, module in ServerModule Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- ServerModules.cfg | 1 + arkouda/sparrayclass.py | 9 +++++---- pytest.ini | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ServerModules.cfg b/ServerModules.cfg index 2785ac5d2e..b3947ac77b 100644 --- a/ServerModules.cfg +++ b/ServerModules.cfg @@ -30,6 +30,7 @@ SegmentedMsg SequenceMsg SetMsg SortMsg +SparseMatrixMsg StatsMsg TimeClassMsg TransferMsg diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index fe778614d3..a3c6645b0a 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -8,7 +8,6 @@ from arkouda.client import generic_msg from arkouda.dtypes import NumericDTypes, dtype -from arkouda.dtypes import int64 as akint64 from arkouda.dtypes import int_scalars from arkouda.logger import getArkoudaLogger from arkouda.pdarrayclass import create_pdarrays, pdarray @@ -130,22 +129,24 @@ def __str__(self): # This won't work out of the box for sparrays need to add th >>> type(a.to_ndarray()) ??? """ + @typechecked def to_pdarray(self): - size = self.nnz dtype = self.dtype dtype_name = cast(np.dtype, dtype).name # check dtype for error if dtype_name not in NumericDTypes: raise TypeError(f"unsupported dtype {dtype}") responseArrays = generic_msg(cmd="sparse_to_pdarrays", args={"matrix": self}) - array_list = create_pdarrays(responseArrays); + array_list = create_pdarrays(responseArrays) return array_list """""" + def fill_vals(self, a: pdarray): generic_msg(cmd="fill_sparse_vals", args={"matrix": self, "vals": a}) - + + # creates sparray object # only after: # all values have been checked by python module and... diff --git a/pytest.ini b/pytest.ini index 26088e57fe..8b7316accf 100644 --- a/pytest.ini +++ b/pytest.ini @@ -44,6 +44,7 @@ testpaths = tests/series_test.py tests/setops_test.py tests/sort_test.py + tests/sparse_test.py tests/stats_test.py tests/string_test.py tests/symbol_table_test.py From 139afdbcc09d84ed6d9b4acbcde5a687e6232723 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:52:03 -0500 Subject: [PATCH 06/10] Add sparse matrix compat module for 2.0 Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- src/SparseMatrix.chpl | 1 + .../eq-20/ArkoudaSparseMatrixCompat.chpl | 86 +++++++++++++++++++ .../eq-21/ArkoudaSparseMatrixCompat.chpl | 1 + .../ge-22/ArkoudaSparseMatrixCompat.chpl | 1 + 4 files changed, 89 insertions(+) create mode 100644 src/compat/eq-20/ArkoudaSparseMatrixCompat.chpl create mode 100644 src/compat/eq-21/ArkoudaSparseMatrixCompat.chpl create mode 100644 src/compat/ge-22/ArkoudaSparseMatrixCompat.chpl diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index 5459e3cb60..cb92ccaf0b 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -1,6 +1,7 @@ module SparseMatrix { public use SpsMatUtil; + use ArkoudaSparseMatrixCompat; // Quick and dirty, not permanent diff --git a/src/compat/eq-20/ArkoudaSparseMatrixCompat.chpl b/src/compat/eq-20/ArkoudaSparseMatrixCompat.chpl new file mode 100644 index 0000000000..1ab8429fd5 --- /dev/null +++ b/src/compat/eq-20/ArkoudaSparseMatrixCompat.chpl @@ -0,0 +1,86 @@ +module ArkoudaSparseMatrixCompat { + use SparseBlockDist; + + proc SparseBlockDom.setLocalSubdomain(locIndices, loc: locale = here) { + if loc != here then + halt("setLocalSubdomain() doesn't currently support remote updates"); + ref myBlock = this.myLocDom!.mySparseBlock; + if myBlock.type != locIndices.type then + compilerError("setLocalSubdomain() expects its argument to be of type ", + myBlock.type:string); + else + myBlock = locIndices; + } + + proc SparseBlockArr.getLocalSubarray(localeRow, localeCol) const ref { + return this.locArr[localeRow, localeCol]!.myElems; + } + + proc SparseBlockArr.getLocalSubarray(localeIdx) const ref { + return this.locArr[localeIdx]!.myElems; + } + + proc SparseBlockArr.setLocalSubarray(locNonzeroes, loc: locale = here) { + if loc != here then + halt("setLocalSubarray() doesn't currently support remote updates"); + ref myBlock = this.myLocArr!.myElems; + if myBlock.type != locNonzeroes.type then + compilerError("setLocalSubarray() expects its argument to be of type ", + myBlock.type:string); + else + myBlock.data = locNonzeroes.data; + } + + proc SparseBlockDom.dsiTargetLocales() const ref { + return dist.targetLocales; + } + + proc SparseBlockArr.dsiTargetLocales() const ref { + return dom.dsiTargetLocales(); + } + + use LayoutCS; + + proc CSDom.rows() { + return this.rowRange; + } + + proc CSDom.cols() { + return this.colRange; + } + + @chpldoc.nodoc + iter CSDom.uidsInRowCol(rc) { + for uid in startIdx[rc].. Date: Wed, 11 Sep 2024 15:21:52 -0500 Subject: [PATCH 07/10] Add missing newlines at the end of files Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- src/SparseMatrixMsg.chpl | 2 +- tests/sparse_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SparseMatrixMsg.chpl b/src/SparseMatrixMsg.chpl index 52248a812b..9709649701 100644 --- a/src/SparseMatrixMsg.chpl +++ b/src/SparseMatrixMsg.chpl @@ -153,4 +153,4 @@ module SparseMatrixMsg { registerFunction("sparse_to_pdarrays", sparseMatrixtoPdarray, getModuleName()); registerFunction("fill_sparse_vals", fillSparseMatrixMsg, getModuleName()); -} \ No newline at end of file +} diff --git a/tests/sparse_test.py b/tests/sparse_test.py index baa9b86ef3..4dc70ecca7 100644 --- a/tests/sparse_test.py +++ b/tests/sparse_test.py @@ -91,4 +91,4 @@ def matmatmult(rowsA, colsA, valsA, rowsB, colsB, valsB): # Check the result is correct assert np.all(result_rows == ans_rows) assert np.all(result_cols == ans_cols) - assert np.all(result_vals == ans_vals) \ No newline at end of file + assert np.all(result_vals == ans_vals) From ad71480f76d06c0b39cbb6320bf57f3fe5f7613e Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Wed, 11 Sep 2024 17:59:46 -0500 Subject: [PATCH 08/10] List public object in sparrayclass with __all__ Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index a3c6645b0a..d1449547b0 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -14,6 +14,10 @@ logger = getArkoudaLogger(name="sparrayclass") +__all__ = [ + "sparray", + "create_sparray", +] class sparray: """ From a75ee41717439499e30135baf690eaf10f854bf5 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:19:35 -0500 Subject: [PATCH 09/10] Comment out non distributed version of mult Comment out the non ditributed version of the sparse matrix multiplication and also reduce the problem size in the test to make sure we don't run out of memory Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 4 ++-- src/SparseMatrix.chpl | 12 ++++++------ tests/sparse_test.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index d1449547b0..53217ab3c3 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -7,8 +7,7 @@ from typeguard import typechecked from arkouda.client import generic_msg -from arkouda.dtypes import NumericDTypes, dtype -from arkouda.dtypes import int_scalars +from arkouda.dtypes import NumericDTypes, dtype, int_scalars from arkouda.logger import getArkoudaLogger from arkouda.pdarrayclass import create_pdarrays, pdarray @@ -19,6 +18,7 @@ "create_sparray", ] + class sparray: """ The class for sparse arrays. This class contains only the diff --git a/src/SparseMatrix.chpl b/src/SparseMatrix.chpl index cb92ccaf0b..b2cfac5550 100644 --- a/src/SparseMatrix.chpl +++ b/src/SparseMatrix.chpl @@ -44,14 +44,14 @@ module SparseMatrix { } // sparse, outer, matrix-matrix multiplication algorithm; A is assumed // CSC and B CSR - proc sparseMatMatMult(A, B) { - var spsData: sparseMatDat; + // proc sparseMatMatMult(A, B) { + // var spsData: sparseMatDat; - sparseMatMatMult(A, B, spsData); + // sparseMatMatMult(A, B, spsData); - var C = makeSparseMat(A.domain.parentDom, spsData); - return C; - } + // var C = makeSparseMat(A.domain.parentDom, spsData); + // return C; + // } // This version forms the guts of the above and permits a running set // of nonzeroes to be passed in and updated rather than assuming that diff --git a/tests/sparse_test.py b/tests/sparse_test.py index 4dc70ecca7..44892d2a3b 100644 --- a/tests/sparse_test.py +++ b/tests/sparse_test.py @@ -73,8 +73,8 @@ def matmatmult(rowsA, colsA, valsA, rowsB, colsB, valsB): result_rows, result_cols, result_vals = zip(*[(r, c, v) for (r, c), v in result.items()]) return list(result_rows), list(result_cols), list(result_vals) - matA = ak.random_sparse_matrix(100, 1, 'CSC') # Make it fully dense to make testing easy - matB = ak.random_sparse_matrix(100, 1, 'CSR') # Make it fully dense to make testing easy + matA = ak.random_sparse_matrix(10, 1, 'CSC') # Make it fully dense to make testing easy + matB = ak.random_sparse_matrix(10, 1, 'CSR') # Make it fully dense to make testing easy fill_vals_a = ak.randint(0, 10, matA.nnz) fill_vals_b = ak.randint(0, 10, matB.nnz) matA.fill_vals(fill_vals_a) From 5e642bc44420781ca88744b449fe02d12c6c25f7 Mon Sep 17 00:00:00 2001 From: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:28:37 -0500 Subject: [PATCH 10/10] Changes made based on feedback Signed-off-by: Shreyas Khandekar <60454060+ShreyasKhandekar@users.noreply.github.com> --- arkouda/sparrayclass.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/arkouda/sparrayclass.py b/arkouda/sparrayclass.py index 53217ab3c3..7e859f6a77 100644 --- a/arkouda/sparrayclass.py +++ b/arkouda/sparrayclass.py @@ -102,36 +102,17 @@ def __str__(self): # This won't work out of the box for sparrays need to add th """ Converts the sparse matrix to a list of 3 pdarrays (rows, cols, vals) + Returns ------- List[ak.pdarray] A list of 3 pdarrays which contain the row indices, the column indices, and the values at the respective indices within the sparse matrix. - - Raises - ------ - RuntimeError - Raised if there is a server-side error thrown, if the pdarray size - exceeds the built-in client.maxTransferBytes size limit, or if the bytes - received does not match expected number of bytes - Notes - ----- - The number of bytes in the array cannot exceed ``client.maxTransferBytes``, - otherwise a ``RuntimeError`` will be raised. This is to protect the user - from overflowing the memory of the system on which the Python client - is running, under the assumption that the server is running on a - distributed system with much more memory than the client. The user - may override this limit by setting client.maxTransferBytes to a larger - value, but proceed with caution. - Examples -------- >>> a = ak.random_sparse_matrix(100,0.2,"CSR"); >>> a.to_pdarray() - ??? - - >>> type(a.to_ndarray()) - ??? + [array([1 1 1 ... 100 100 100]), array([17 21 29 ... 75 77 85]), array([0 0 0 ... 0 0 0])] """ @typechecked