-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-6364] [MLlib] Implement equals and hashcode for Matrix #5081
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -114,6 +114,52 @@ sealed trait Matrix extends Serializable { | |
| * corresponding value in the matrix with type `Double`. | ||
| */ | ||
| private[spark] def foreachActive(f: (Int, Int, Double) => Unit) | ||
|
|
||
| override def hashCode(): Int = { | ||
| var result: Int = 31 + numRows | ||
| result = 31 * result + numCols | ||
| this.foreachActive { case (rowInd, colInd, value) => | ||
| // ignore explict 0 for comparison between sparse and dense | ||
| if (value != 0) { | ||
| result = 31 * result + rowInd | ||
| result = 31 * result + colInd | ||
| // refer to {@link java.util.Arrays.equals} for hash algorithm | ||
| val bits = java.lang.Double.doubleToLongBits(value) | ||
| result = 31 * result + (bits ^ (bits >>> 32)).toInt | ||
| } | ||
| } | ||
| result | ||
| } | ||
|
|
||
| override def equals(other: Any): Boolean = { | ||
| other match { | ||
| case mat: Matrix => | ||
| if (mat.numRows != this.numRows || mat.numCols != this.numCols) return false | ||
| (this, mat) match { | ||
| case (dm1: DenseMatrix, dm2: DenseMatrix) => | ||
| Arrays.equals(dm1.toArray, dm2.toArray) | ||
| case (sm1: SparseMatrix, sm2: SparseMatrix) => | ||
| // For the case in which one matrix is CSC and the other is CSR | ||
| // the values, colPtrs and rowIndices need not be the same. | ||
| // When both matrices are of the same type, it is sufficient to check that | ||
| // the values, colPtrs and rowIndices are the same. | ||
| if (sm1.isTransposed != sm2.isTransposed) { | ||
| if (sm1.values.length != sm2.values.length) return false | ||
| sm1.foreachActive { | ||
| case (i, j, value) => if (value != sm2(i, j)) return false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
| } else { | ||
| if (sm1.values != sm2.values) return false | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not correct because one matrix may contain explicit zeros. Please include this case in the unit test. |
||
| if (sm1.colPtrs != sm2.colPtrs) return false | ||
| if (sm1.rowIndices != sm2.rowIndices) return false | ||
| } | ||
| true | ||
| case (dm1: DenseMatrix, sm1: SparseMatrix) => Matrices.equals(dm1, sm1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| case (sm1: SparseMatrix, dm1: DenseMatrix) => Matrices.equals(dm1, sm1) | ||
| } | ||
| case _ => false | ||
| } | ||
| } | ||
| } | ||
|
|
||
| @DeveloperApi | ||
|
|
@@ -814,6 +860,16 @@ object Matrices { | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Check equality between sparse/dense matrices | ||
| */ | ||
| private[mllib] def equals(denseMat: DenseMatrix, sparseMat: SparseMatrix): Boolean = { | ||
| sparseMat.foreachActive { (row, col, value) => | ||
| if (value != denseMat(row, col)) return false | ||
| } | ||
| return true | ||
| } | ||
|
|
||
| /** | ||
| * Generate a `Matrix` consisting of zeros. | ||
| * @param numRows number of rows of the matrix | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hash
isTransposed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this is be done? Even if
isTransposedis not same, two matrices can be equal.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please verify this?
There might be cases where a CSR matrix is the same as a CSC matrix. See (https://github.com/apache/spark/pull/5081/files#diff-8fbb9a5e1adf997a37f4d05521b8a1acR488)
If I hash
isTransposedI would give different hashes for both of these, which is not desirable.