Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
* @param values matrix entries in column major if not transposed or in row major otherwise
* @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
* row major.
* @since 1.0.0
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class DenseMatrix(
Expand All @@ -254,6 +255,7 @@ class DenseMatrix(
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
* @since 1.3.0
*/
def this(numRows: Int, numCols: Int, values: Array[Double]) =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ping

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @MechCoder I sincerely apologize for my ignorance here! I am a little new to Scala and am not sure what I am missing here. I will be happy to fix it if you can provide some clarification. I have fixed the other errors that you identified below and will update the PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh never mind, I could notice that from the diff view :( . Looks all right to me.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:) Thanks!

this(numRows, numCols, values, false)
Expand All @@ -279,6 +281,9 @@ class DenseMatrix(

private[mllib] def apply(i: Int): Double = values(i)

/**
* @since 1.3.0
*/
override def apply(i: Int, j: Int): Double = values(index(i, j))

private[mllib] def index(i: Int, j: Int): Int = {
Expand All @@ -289,6 +294,9 @@ class DenseMatrix(
values(index(i, j)) = v
}

/**
* @since 1.4.0
*/
override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())

private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
Expand All @@ -304,6 +312,9 @@ class DenseMatrix(
this
}

/**
* @since 1.3.0
*/
override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)

private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
Expand Down Expand Up @@ -334,13 +345,20 @@ class DenseMatrix(
}
}

/**
* @since 1.5.0
*/
override def numNonzeros: Int = values.count(_ != 0)

/**
* @since 1.5.0
*/
override def numActives: Int = values.length

/**
* Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
* set to false.
* @since 1.3.0
*/
def toSparse: SparseMatrix = {
val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
Expand Down Expand Up @@ -368,6 +386,7 @@ class DenseMatrix(

/**
* Factory methods for [[org.apache.spark.mllib.linalg.DenseMatrix]].
* @since 1.3.0
*/
object DenseMatrix {

Expand All @@ -376,6 +395,7 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
* @since 1.3.0
*/
def zeros(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
Expand All @@ -388,6 +408,7 @@ object DenseMatrix {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
* @since 1.3.0
*/
def ones(numRows: Int, numCols: Int): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
Expand All @@ -399,6 +420,7 @@ object DenseMatrix {
* Generate an Identity Matrix in `DenseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
* @since 1.3.0
*/
def eye(n: Int): DenseMatrix = {
val identity = DenseMatrix.zeros(n, n)
Expand All @@ -416,6 +438,7 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
* @since 1.3.0
*/
def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
Expand All @@ -429,6 +452,7 @@ object DenseMatrix {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
* @since 1.3.0
*/
def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
require(numRows.toLong * numCols <= Int.MaxValue,
Expand All @@ -441,6 +465,7 @@ object DenseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
* on the diagonal
* @since 1.3.0
*/
def diag(vector: Vector): DenseMatrix = {
val n = vector.size
Expand Down Expand Up @@ -476,6 +501,7 @@ object DenseMatrix {
* @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
* Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
* and `rowIndices` behave as colIndices, and `values` are stored in row major.
* @since 1.2.0
*/
@SQLUserDefinedType(udt = classOf[MatrixUDT])
class SparseMatrix(
Expand Down Expand Up @@ -513,6 +539,7 @@ class SparseMatrix(
* @param rowIndices the row index of the entry. They must be in strictly increasing
* order for each column
* @param values non-zero matrix entries in column major
* @since 1.3.0
*/
def this(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here as well.

numRows: Int,
Expand All @@ -530,6 +557,9 @@ class SparseMatrix(
}
}

/**
* @since 1.3.0
*/
override def apply(i: Int, j: Int): Double = {
val ind = index(i, j)
if (ind < 0) 0.0 else values(ind)
Expand All @@ -553,6 +583,9 @@ class SparseMatrix(
}
}

/**
* @since 1.4.0
*/
override def copy: SparseMatrix = {
new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
}
Expand All @@ -570,6 +603,9 @@ class SparseMatrix(
this
}

/**
* @since 1.3.0
*/
override def transpose: SparseMatrix =
new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)

Expand Down Expand Up @@ -603,19 +639,27 @@ class SparseMatrix(
/**
* Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
* set to false.
* @since 1.3.0
*/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tags for numNonzeros and numActives are missing.

def toDense: DenseMatrix = {
new DenseMatrix(numRows, numCols, toArray)
}

/**
* @since 1.5.0
*/
override def numNonzeros: Int = values.count(_ != 0)

/**
* @since 1.5.0
*/
override def numActives: Int = values.length

}

/**
* Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
* @since 1.3.0
*/
object SparseMatrix {

Expand All @@ -627,6 +671,7 @@ object SparseMatrix {
* @param numCols number of columns of the matrix
* @param entries Array of (i, j, value) tuples
* @return The corresponding `SparseMatrix`
* @since 1.3.0
*/
def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
Expand Down Expand Up @@ -675,6 +720,7 @@ object SparseMatrix {
* Generate an Identity Matrix in `SparseMatrix` format.
* @param n number of rows and columns of the matrix
* @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
* @since 1.3.0
*/
def speye(n: Int): SparseMatrix = {
new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
Expand Down Expand Up @@ -744,6 +790,7 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
* @since 1.3.0
*/
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
Expand All @@ -757,6 +804,7 @@ object SparseMatrix {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
* @since 1.3.0
*/
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
val mat = genRandMatrix(numRows, numCols, density, rng)
Expand All @@ -768,6 +816,7 @@ object SparseMatrix {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
* `values` on the diagonal
* @since 1.3.0
*/
def spdiag(vector: Vector): SparseMatrix = {
val n = vector.size
Expand All @@ -784,6 +833,7 @@ object SparseMatrix {

/**
* Factory methods for [[org.apache.spark.mllib.linalg.Matrix]].
* @since 1.0.0
*/
object Matrices {

Expand All @@ -793,6 +843,7 @@ object Matrices {
* @param numRows number of rows
* @param numCols number of columns
* @param values matrix entries in column major
* @since 1.0.0
*/
def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
new DenseMatrix(numRows, numCols, values)
Expand All @@ -806,6 +857,7 @@ object Matrices {
* @param colPtrs the index corresponding to the start of a new column
* @param rowIndices the row index of the entry
* @param values non-zero matrix entries in column major
* @since 1.2.0
*/
def sparse(
numRows: Int,
Expand Down Expand Up @@ -839,6 +891,7 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of zeros
* @since 1.2.0
*/
def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)

Expand All @@ -847,20 +900,23 @@ object Matrices {
* @param numRows number of rows of the matrix
* @param numCols number of columns of the matrix
* @return `Matrix` with size `numRows` x `numCols` and values of ones
* @since 1.2.0
*/
def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)

/**
* Generate a dense Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
* @since 1.2.0
*/
def eye(n: Int): Matrix = DenseMatrix.eye(n)

/**
* Generate a sparse Identity Matrix in `Matrix` format.
* @param n number of rows and columns of the matrix
* @return `Matrix` with size `n` x `n` and values of ones on the diagonal
* @since 1.3.0
*/
def speye(n: Int): Matrix = SparseMatrix.speye(n)

Expand All @@ -870,6 +926,7 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
* @since 1.2.0
*/
def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.rand(numRows, numCols, rng)
Expand All @@ -881,6 +938,7 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
* @since 1.3.0
*/
def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprand(numRows, numCols, density, rng)
Expand All @@ -891,6 +949,7 @@ object Matrices {
* @param numCols number of columns of the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
* @since 1.2.0
*/
def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
DenseMatrix.randn(numRows, numCols, rng)
Expand All @@ -902,6 +961,7 @@ object Matrices {
* @param density the desired density for the matrix
* @param rng a random number generator
* @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
* @since 1.3.0
*/
def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
SparseMatrix.sprandn(numRows, numCols, density, rng)
Expand All @@ -911,6 +971,7 @@ object Matrices {
* @param vector a `Vector` that will form the values on the diagonal of the matrix
* @return Square `Matrix` with size `values.length` x `values.length` and `values`
* on the diagonal
* @since 1.2.0
*/
def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)

Expand All @@ -920,6 +981,7 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were horizontally concatenated
* @since 1.3.0
*/
def horzcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
Expand Down Expand Up @@ -978,6 +1040,7 @@ object Matrices {
* a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
* @param matrices array of matrices
* @return a single `Matrix` composed of the matrices that were vertically concatenated
* @since 1.3.0
*/
def vertcat(matrices: Array[Matrix]): Matrix = {
if (matrices.isEmpty) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.apache.spark.annotation.Experimental
/**
* :: Experimental ::
* Represents singular value decomposition (SVD) factors.
* @since 1.0.0
*/
@Experimental
case class SingularValueDecomposition[UType, VType](U: UType, s: Vector, V: VType)
Loading