diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java index 95a430f1da23..e11f65ca385e 100644 --- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java +++ b/examples/src/main/java/org/apache/spark/examples/mllib/JavaALS.java @@ -76,9 +76,12 @@ public static void main(String[] args) { JavaRDD lines = sc.textFile(args[0]); JavaRDD ratings = lines.map(new ParseRating()); - - MatrixFactorizationModel model = ALS.train(ratings.rdd(), rank, iterations, 0.01, blocks); - + + MatrixFactorizationModel model = new ALS().setRank(rank) + .setIterations(iterations) + .setBlocks(blocks) + .run(ratings.rdd()); + model.userFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( outputDir + "/userFeatures"); model.productFeatures().toJavaRDD().map(new FeaturesToString()).saveAsTextFile( diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala index 1f4ca4fbe777..57e05761df50 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala @@ -18,12 +18,10 @@ package org.apache.spark.examples.mllib import scala.collection.mutable - import org.apache.log4j.{Level, Logger} import scopt.OptionParser - +import breeze.optimize.proximal.Constraint import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.SparkContext._ import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating} import org.apache.spark.rdd.RDD @@ -37,19 +35,25 @@ import org.apache.spark.rdd.RDD * If you use it as a template to create your own app, please use `spark-submit` to submit your app. */ object MovieLensALS { - case class Params( - input: String = null, - kryo: Boolean = false, - numIterations: Int = 20, - lambda: Double = 1.0, - rank: Int = 10, - numUserBlocks: Int = -1, - numProductBlocks: Int = -1, - implicitPrefs: Boolean = false) extends AbstractParams[Params] + input: String = null, + kryo: Boolean = false, + numIterations: Int = 20, + userConstraint: String = "SMOOTH", + productConstraint: String = "SMOOTH", + userLambda: Double = 1.0, + productLambda: Double = 1.0, + rank: Int = 10, + delimiter: String = "::", + numUserBlocks: Int = -1, + numProductBlocks: Int = -1, + implicitPrefs: Boolean = false) extends AbstractParams[Params] def main(args: Array[String]) { val defaultParams = Params() + + val userConstraints = Constraint.values.toList.mkString(",") + val productConstraints = Constraint.values.toList.mkString(",") val parser = new OptionParser[Params]("MovieLensALS") { head("MovieLensALS: an example app for ALS on MovieLens data.") @@ -59,9 +63,21 @@ object MovieLensALS { opt[Int]("numIterations") .text(s"number of iterations, default: ${defaultParams.numIterations}") .action((x, c) => c.copy(numIterations = x)) - opt[Double]("lambda") - .text(s"lambda (smoothing constant), default: ${defaultParams.lambda}") - .action((x, c) => c.copy(lambda = x)) + opt[String]("userConstraint") + .text(s"user constraint options ${userConstraints} default: SMOOTH") + .action((x, c) => c.copy(userConstraint = x)) + opt[String]("productConstraint") + .text(s"product constraint options ${productConstraints} default: SMOOTH") + .action((x, c) => c.copy(productConstraint = x)) + opt[Double]("lambdaUser") + .text(s"lambda for user regularization, default: ${defaultParams.userLambda}") + .action((x, c) => c.copy(userLambda = x)) + opt[Double]("lambdaProduct") + .text(s"lambda for product regularization, default: ${defaultParams.productLambda}") + .action((x, c) => c.copy(productLambda = x)) + opt[String]("delimiter") + .text(s"sparse dataset delimiter, default: ${defaultParams.delimiter}") + .action((x, c) => c.copy(delimiter = x)) opt[Unit]("kryo") .text("use Kryo serialization") .action((_, c) => c.copy(kryo = true)) @@ -84,7 +100,8 @@ object MovieLensALS { | | bin/spark-submit --class org.apache.spark.examples.mllib.MovieLensALS \ | examples/target/scala-*/spark-examples-*.jar \ - | --rank 5 --numIterations 20 --lambda 1.0 --kryo \ + | --rank 5 --numIterations 20 --userConstraint SMOOTH --productConstraint SPARSE + | --userLambda 0.01 --productLambda 1.0 --kryo\ | data/mllib/sample_movielens_data.txt """.stripMargin) } @@ -107,9 +124,10 @@ object MovieLensALS { Logger.getRootLogger.setLevel(Level.WARN) val implicitPrefs = params.implicitPrefs - + val delimiter = params.delimiter + val ratings = sc.textFile(params.input).map { line => - val fields = line.split("::") + val fields = line.split(delimiter) if (implicitPrefs) { /* * MovieLens ratings are on a scale of 1-5: @@ -136,9 +154,10 @@ object MovieLensALS { val numMovies = ratings.map(_.product).distinct().count() println(s"Got $numRatings ratings from $numUsers users on $numMovies movies.") - - val splits = ratings.randomSplit(Array(0.8, 0.2)) + + val splits = ratings.randomSplit(Array(0.8, 0.2), 1L) val training = splits(0).cache() + val test = if (params.implicitPrefs) { /* * 0 means "don't know" and positive values mean "confident that the prediction should be 1". @@ -158,14 +177,23 @@ object MovieLensALS { ratings.unpersist(blocking = false) - val model = new ALS() + val userConstraint = Constraint.withName(params.userConstraint) + val productConstraint = Constraint.withName(params.productConstraint) + + val als = new ALS() .setRank(params.rank) .setIterations(params.numIterations) - .setLambda(params.lambda) + .setUserConstraint(userConstraint) + .setProductConstraint(productConstraint) + .setUserLambda(params.userLambda) + .setProductLambda(params.productLambda) .setImplicitPrefs(params.implicitPrefs) .setUserBlocks(params.numUserBlocks) .setProductBlocks(params.numProductBlocks) - .run(training) + + println(s"ALS with userConstraint ${userConstraint} productConstraint ${productConstraint}") + + val model = als.run(training) val rmse = computeRmse(model, test, params.implicitPrefs) @@ -179,11 +207,11 @@ object MovieLensALS { : Double = { def mapPredictedRating(r: Double) = if (implicitPrefs) math.max(math.min(r, 1.0), 0.0) else r - val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product))) - val predictionsAndRatings = predictions.map{ x => + val predictionsAndRatings = predictions.map { x => ((x.user, x.product), mapPredictedRating(x.rating)) }.join(data.map(x => ((x.user, x.product), x.rating))).values + math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean()) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 514b4ef98dc5..2a5b5a493959 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -20,6 +20,7 @@ package org.apache.spark.ml.recommendation import java.{util => ju} import java.io.IOException +import breeze.optimize.proximal.{ProximalL1, QuadraticMinimizer} import scala.collection.mutable import scala.reflect.ClassTag import scala.util.Sorting @@ -43,6 +44,9 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter} import org.apache.spark.util.random.XORShiftRandom +import breeze.optimize.proximal.Constraint._ +import breeze.linalg.{DenseVector=>BrzVector} +import breeze.linalg.{DenseMatrix=>BrzMatrix} /** * Common params for ALS. @@ -72,8 +76,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR * Param for number of item blocks. * @group param */ - val numItemBlocks = - new IntParam(this, "numItemBlocks", "number of item blocks", Some(10)) + val numItemBlocks = new IntParam(this, "numItemBlocks", "number of item blocks", Some(10)) /** @group getParam */ def getNumItemBlocks: Int = get(numItemBlocks) @@ -289,11 +292,17 @@ class ALS extends Estimator[ALSModel] with ALSParams { .map { row => Rating(row.getInt(0), row.getInt(1), row.getFloat(2)) } + + val userRegParam = map(regParam) + val userConstraint = if(map(nonnegative)) POSITIVE else SMOOTH + val (userFactors, itemFactors) = ALS.train(ratings, rank = map(rank), numUserBlocks = map(numUserBlocks), numItemBlocks = map(numItemBlocks), - maxIter = map(maxIter), regParam = map(regParam), implicitPrefs = map(implicitPrefs), - alpha = map(alpha), nonnegative = map(nonnegative), - checkpointInterval = map(checkpointInterval)) + maxIter = map(maxIter), + userRegParam=userRegParam, itemRegParam=userRegParam, + implicitPrefs = map(implicitPrefs), + alpha = map(alpha), + userConstraint=userConstraint, itemConstraint=userConstraint) val model = new ALSModel(this, map, map(rank), userFactors, itemFactors) Params.inheritValues(map, this, model) model @@ -324,6 +333,58 @@ object ALS extends Logging { def solve(ne: NormalEquation, lambda: Double): Array[Float] } + /** QuadraticMinimization solver for least square problems. */ + private[recommendation] class QuadraticSolver(rank: Int, + constraint: Constraint) + extends LeastSquaresNESolver { + private val qm = QuadraticMinimizer(rank, constraint) + private val init = qm.initialize + // Elastic Net beta parameter for L1 regularization + private val beta = if (constraint==SPARSE) 0.99 else 0.0 + + /** Quadratic Minimization solver for least square problems with non-smooth constraints (L1) + * + * minimize 0.5x'Hx + c'x + g(z) + * s.t Aeq x = beq + * + * Affine constraints are optional, Supported g(z) are one of the following + * + * 1. z >= 0 + * 2. lb <= z <= ub + * 3. 1'z = s, s>=0 + * 4. lambda*L1(z) + * + * TO DO: Add the remaining constraints + * + * @param ne a [[NormalEquation]] instance that contains AtA, Atb, and n (number of instances) + * @param lambda regularization constant, which will be scaled by n + * @return the solution x + */ + override def solve(ne: NormalEquation, lambda: Double): Array[Float] = { + require(ne.k == rank, s"ALS:QuadraticSolver rank $rank expected ${ne.k}") + + // If Elastic Net formulation is being run, give (1-beta)*lambda to L2 and + // beta*lambda to L1. The nomenclature used here is exactly same as GLMNET + val scaledlambda = lambda * (1- beta) * ne.n + var i = 0 + var j = 2 + while (i < ne.triK) { + ne.ata(i) += scaledlambda + i += j + j += 1 + } + if (constraint == SPARSE) { + val regParamL1 = beta * lambda * ne.n + qm.getProximal.asInstanceOf[ProximalL1].setLambda(regParamL1) + } + val q = new BrzVector(ne.atb) + q *= -1.0 + val x = qm.minimize(ne.ata, q, init) + ne.reset() + x.data.map(x => x.toFloat) + } + } + /** Cholesky solver for least square problems. */ private[recommendation] class CholeskySolver extends LeastSquaresNESolver { @@ -496,10 +557,12 @@ object ALS extends Logging { numUserBlocks: Int = 10, numItemBlocks: Int = 10, maxIter: Int = 10, - regParam: Double = 1.0, + userRegParam: Double = 1.0, + itemRegParam: Double = 1.0, implicitPrefs: Boolean = false, alpha: Double = 1.0, - nonnegative: Boolean = false, + userConstraint: Constraint = SMOOTH, + itemConstraint: Constraint = SMOOTH, intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK, finalRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK, checkpointInterval: Int = 10, @@ -512,7 +575,27 @@ object ALS extends Logging { val itemPart = new ALSPartitioner(numItemBlocks) val userLocalIndexEncoder = new LocalIndexEncoder(userPart.numPartitions) val itemLocalIndexEncoder = new LocalIndexEncoder(itemPart.numPartitions) - val solver = if (nonnegative) new NNLSSolver else new CholeskySolver + val solver = System.getenv("solver") + + val userSolver = + if (userConstraint == POSITIVE) new NNLSSolver + else { + if (solver == "mllib") new CholeskySolver + else { + println(s"QuadraticSolver for users with constraint ${userConstraint.toString}") + new QuadraticSolver(rank, userConstraint) + } + } + + val itemSolver = + if (itemConstraint == POSITIVE) new NNLSSolver + else { + if (solver == "mllib") new CholeskySolver + else { + println(s"QuadraticSolver for items with constraint ${itemConstraint.toString}") + new QuadraticSolver(rank, itemConstraint) + } + } val blockRatings = partitionRatings(ratings, userPart, itemPart) .persist(intermediateRDDStorageLevel) val (userInBlocks, userOutBlocks) = @@ -546,8 +629,8 @@ object ALS extends Logging { for (iter <- 1 to maxIter) { userFactors.setName(s"userFactors-$iter").persist(intermediateRDDStorageLevel) val previousItemFactors = itemFactors - itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam, - userLocalIndexEncoder, implicitPrefs, alpha, solver) + itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, userRegParam, + userLocalIndexEncoder, implicitPrefs, alpha, userSolver) previousItemFactors.unpersist() itemFactors.setName(s"itemFactors-$iter").persist(intermediateRDDStorageLevel) // TODO: Generalize PeriodicGraphCheckpointer and use it here. @@ -555,8 +638,8 @@ object ALS extends Logging { itemFactors.checkpoint() // itemFactors gets materialized in computeFactors. } val previousUserFactors = userFactors - userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam, - itemLocalIndexEncoder, implicitPrefs, alpha, solver) + userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, itemRegParam, + itemLocalIndexEncoder, implicitPrefs, alpha, itemSolver) if (shouldCheckpoint(iter)) { deletePreviousCheckpointFile() previousCheckpointFile = itemFactors.getCheckpointFile @@ -565,16 +648,16 @@ object ALS extends Logging { } } else { for (iter <- 0 until maxIter) { - itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, regParam, - userLocalIndexEncoder, solver = solver) + itemFactors = computeFactors(userFactors, userOutBlocks, itemInBlocks, rank, itemRegParam, + userLocalIndexEncoder, solver = itemSolver) if (shouldCheckpoint(iter)) { itemFactors.checkpoint() itemFactors.count() // checkpoint item factors and cut lineage deletePreviousCheckpointFile() previousCheckpointFile = itemFactors.getCheckpointFile } - userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, regParam, - itemLocalIndexEncoder, solver = solver) + userFactors = computeFactors(itemFactors, itemOutBlocks, userInBlocks, rank, userRegParam, + itemLocalIndexEncoder, solver = userSolver) } } val userIdAndFactors = userInBlocks @@ -1102,6 +1185,7 @@ object ALS extends Logging { dstInBlocks.join(merged).mapValues { case (InBlock(dstIds, srcPtrs, srcEncodedIndices, ratings), srcFactors) => val sortedSrcFactors = new Array[FactorBlock](numSrcBlocks) + var solveTime = 0.0 srcFactors.foreach { case (srcBlockId, factors) => sortedSrcFactors(srcBlockId) = factors } @@ -1127,9 +1211,12 @@ object ALS extends Logging { } i += 1 } + val startTime = System.nanoTime() dstFactors(j) = solver.solve(ls, regParam) + solveTime += (System.nanoTime() - startTime) j += 1 } + logInfo(s"solveTime ${solveTime/1e6} ms") dstFactors } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index e39156734794..9b445631bebb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -25,10 +25,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.language.existentials import scala.reflect.ClassTag - import net.razorvine.pickle._ - -import org.apache.spark.annotation.DeveloperApi import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.api.python.SerDeUtil import org.apache.spark.mllib.classification._ @@ -52,6 +49,7 @@ import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils +import breeze.optimize.proximal.Constraint._ /** * The Java stubs necessary for the Python mllib bindings. It is called by Py4J on the Python side. @@ -375,17 +373,19 @@ private[python] class PythonMLLibAPI extends Serializable { ratingsJRDD: JavaRDD[Rating], rank: Int, iterations: Int, + constraint: Constraint, lambda: Double, blocks: Int, - nonnegative: Boolean, seed: java.lang.Long): MatrixFactorizationModel = { val als = new ALS() .setRank(rank) .setIterations(iterations) - .setLambda(lambda) + .setUserLambda(lambda) + .setProductLambda(lambda) + .setUserConstraint(constraint) + .setProductConstraint(constraint) .setBlocks(blocks) - .setNonnegative(nonnegative) if (seed != null) als.setSeed(seed) @@ -403,21 +403,23 @@ private[python] class PythonMLLibAPI extends Serializable { ratingsJRDD: JavaRDD[Rating], rank: Int, iterations: Int, + constraint: Constraint, lambda: Double, blocks: Int, alpha: Double, - nonnegative: Boolean, seed: java.lang.Long): MatrixFactorizationModel = { val als = new ALS() .setImplicitPrefs(true) .setRank(rank) .setIterations(iterations) - .setLambda(lambda) + .setUserLambda(lambda) + .setProductLambda(lambda) + .setUserConstraint(constraint) + .setProductConstraint(constraint) .setBlocks(blocks) .setAlpha(alpha) - .setNonnegative(nonnegative) - + if (seed != null) als.setSeed(seed) val model = als.run(ratingsJRDD.rdd) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala index 4766f7708295..1297939d2d39 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala @@ -27,6 +27,7 @@ import com.github.fommil.netlib.BLAS.{getInstance => blas} */ private[spark] object NNLS { class Workspace(val n: Int) { + val scratch = new Array[Double](n) val grad = new Array[Double](n) val x = new Array[Double](n) @@ -67,7 +68,7 @@ private[spark] object NNLS { val n = atb.length val scratch = ws.scratch - + // find the optimal unconstrained step def steplen(dir: Array[Double], res: Array[Double]): Double = { val top = blas.ddot(n, dir, 1, res, 1) @@ -138,7 +139,7 @@ private[spark] object NNLS { if (stop(step, ndir, nx)) { return x.clone } - + // don't run through the walls i = 0 while (i < n) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index dddefe1944e9..89cd5baf3ff1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -23,6 +23,7 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.ml.recommendation.{ALS => NewALS} import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +import breeze.optimize.proximal.Constraint._ /** * A more compact class to represent a rating than Tuple3[Int, Int, Double]. @@ -59,24 +60,23 @@ case class Rating(user: Int, product: Int, rating: Double) * preferences rather than explicit ratings given to items. */ class ALS private ( - private var numUserBlocks: Int, - private var numProductBlocks: Int, - private var rank: Int, - private var iterations: Int, - private var lambda: Double, - private var implicitPrefs: Boolean, - private var alpha: Double, - private var seed: Long = System.nanoTime() - ) extends Serializable with Logging { - + private var numUserBlocks: Int, + private var numProductBlocks: Int, + private var rank: Int, + private var iterations: Int, + private var userConstraint: Constraint, + private var productConstraint: Constraint, + private var userLambda: Double, + private var productLambda: Double, + private var implicitPrefs: Boolean, + private var alpha: Double, + private var seed: Long = System.nanoTime()) extends Serializable with Logging { /** * Constructs an ALS instance with default parameters: {numBlocks: -1, rank: 10, iterations: 10, - * lambda: 0.01, implicitPrefs: false, alpha: 1.0}. + * userLambda: 0.01, productLambda: 0.01, implicitPrefs: false, alpha: 1.0}. */ - def this() = this(-1, -1, 10, 10, 0.01, false, 1.0) - /** If true, do alternating nonnegative least squares. */ - private var nonnegative = false + def this() = this(-1, -1, 10, 10, SMOOTH, SMOOTH, 0.01, 0.01, false, 1.0) /** storage level for user/product in/out links */ private var intermediateRDDStorageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK @@ -123,12 +123,30 @@ class ALS private ( this } - /** Set the regularization parameter, lambda. Default: 0.01. */ - def setLambda(lambda: Double): this.type = { - this.lambda = lambda + /** Set the user regularization parameter, lambda. Default: 0.01. */ + def setUserLambda(userLambda: Double): ALS = { + this.userLambda = userLambda + this + } + + /* Set the product regularization parameter, lambda. Default : 0.01 */ + def setProductLambda(productLambda: Double): ALS = { + this.productLambda = productLambda this } + /* Set user constraint, Default: SMOOTH */ + def setUserConstraint(userConstraint: Constraint): ALS = { + this.userConstraint = userConstraint + this + } + + /* Set product constraint, Default: SMOOTH */ + def setProductConstraint(productConstraint: Constraint): ALS = { + this.productConstraint = productConstraint + this + } + /** Sets whether to use implicit preference. Default: false. */ def setImplicitPrefs(implicitPrefs: Boolean): this.type = { this.implicitPrefs = implicitPrefs @@ -149,15 +167,6 @@ class ALS private ( this } - /** - * Set whether the least-squares problems solved at each iteration should have - * nonnegativity constraints. - */ - def setNonnegative(b: Boolean): this.type = { - this.nonnegative = b - this - } - /** * :: DeveloperApi :: * Sets storage level for intermediate RDDs (user/product in/out links). The default value is @@ -222,10 +231,12 @@ class ALS private ( numUserBlocks = numUserBlocks, numItemBlocks = numProductBlocks, maxIter = iterations, - regParam = lambda, + userRegParam = userLambda, + itemRegParam = productLambda, implicitPrefs = implicitPrefs, alpha = alpha, - nonnegative = nonnegative, + userConstraint = userConstraint, + itemConstraint = productConstraint, intermediateRDDStorageLevel = intermediateRDDStorageLevel, finalRDDStorageLevel = StorageLevel.NONE, checkpointInterval = checkpointInterval, @@ -266,19 +277,22 @@ object ALS { * @param ratings RDD of (userID, productID, rating) pairs * @param rank number of features to use * @param iterations number of iterations of ALS (recommended: 10-20) - * @param lambda regularization factor (recommended: 0.01) + * @param userLambda L2 smoothness regularization factor (recommended: 0.01) * @param blocks level of parallelism to split computation into * @param seed random seed */ def train( - ratings: RDD[Rating], - rank: Int, - iterations: Int, - lambda: Double, - blocks: Int, - seed: Long - ): MatrixFactorizationModel = { - new ALS(blocks, blocks, rank, iterations, lambda, false, 1.0, seed).run(ratings) + ratings: RDD[Rating], + rank: Int, + iterations: Int, + userConstraint: Constraint, + productConstraint: Constraint, + userLambda: Double, + productLambda: Double, + blocks: Int, + seed: Long): MatrixFactorizationModel = { + new ALS(blocks, blocks, rank, iterations, userConstraint, productConstraint, + userLambda, productLambda, false, 1.0, seed).run(ratings) } /** @@ -291,17 +305,19 @@ object ALS { * @param ratings RDD of (userID, productID, rating) pairs * @param rank number of features to use * @param iterations number of iterations of ALS (recommended: 10-20) - * @param lambda regularization factor (recommended: 0.01) + * @param lambda L2 smoothness regularization factor (recommended: 0.01) * @param blocks level of parallelism to split computation into */ def train( - ratings: RDD[Rating], - rank: Int, - iterations: Int, - lambda: Double, - blocks: Int - ): MatrixFactorizationModel = { - new ALS(blocks, blocks, rank, iterations, lambda, false, 1.0).run(ratings) + ratings: RDD[Rating], + rank: Int, + iterations: Int, + constraint: Constraint, + lambda: Double, + blocks: Int): MatrixFactorizationModel = { + new ALS(blocks, blocks, rank, iterations, + constraint, constraint, lambda, lambda, + false, 1.0).run(ratings) } /** @@ -316,9 +332,12 @@ object ALS { * @param iterations number of iterations of ALS (recommended: 10-20) * @param lambda regularization factor (recommended: 0.01) */ - def train(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double) - : MatrixFactorizationModel = { - train(ratings, rank, iterations, lambda, -1) + def train(ratings: RDD[Rating], + rank: Int, + iterations: Int, + constraint: Constraint, + lambda: Double): MatrixFactorizationModel = { + train(ratings, rank, iterations, constraint, lambda, -1) } /** @@ -332,9 +351,8 @@ object ALS { * @param rank number of features to use * @param iterations number of iterations of ALS (recommended: 10-20) */ - def train(ratings: RDD[Rating], rank: Int, iterations: Int) - : MatrixFactorizationModel = { - train(ratings, rank, iterations, 0.01, -1) + def train(ratings: RDD[Rating], rank: Int, iterations: Int): MatrixFactorizationModel = { + train(ratings, rank, iterations, SMOOTH, 0.01, -1) } /** @@ -352,16 +370,16 @@ object ALS { * @param alpha confidence parameter * @param seed random seed */ - def trainImplicit( - ratings: RDD[Rating], - rank: Int, - iterations: Int, - lambda: Double, - blocks: Int, - alpha: Double, - seed: Long - ): MatrixFactorizationModel = { - new ALS(blocks, blocks, rank, iterations, lambda, true, alpha, seed).run(ratings) + def trainImplicit(ratings: RDD[Rating], + rank: Int, + iterations: Int, + lambda: Double, + blocks: Int, + alpha: Double, + seed: Long): MatrixFactorizationModel = { + new ALS(blocks, blocks, rank, iterations, + SMOOTH, SMOOTH, lambda, lambda, + true, alpha, seed).run(ratings) } /** @@ -378,15 +396,14 @@ object ALS { * @param blocks level of parallelism to split computation into * @param alpha confidence parameter */ - def trainImplicit( - ratings: RDD[Rating], - rank: Int, - iterations: Int, - lambda: Double, - blocks: Int, - alpha: Double - ): MatrixFactorizationModel = { - new ALS(blocks, blocks, rank, iterations, lambda, true, alpha).run(ratings) + def trainImplicit(ratings: RDD[Rating], + rank: Int, + iterations: Int, + lambda: Double, + blocks: Int, + alpha: Double): MatrixFactorizationModel = { + new ALS(blocks, blocks, rank, iterations, SMOOTH, SMOOTH, + lambda, lambda, true, alpha).run(ratings) } /** @@ -403,7 +420,7 @@ object ALS { * @param alpha confidence parameter */ def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int, lambda: Double, alpha: Double) - : MatrixFactorizationModel = { + : MatrixFactorizationModel = { trainImplicit(ratings, rank, iterations, lambda, -1, alpha) } @@ -420,7 +437,7 @@ object ALS { * @param iterations number of iterations of ALS (recommended: 10-20) */ def trainImplicit(ratings: RDD[Rating], rank: Int, iterations: Int) - : MatrixFactorizationModel = { + : MatrixFactorizationModel = { trainImplicit(ratings, rank, iterations, 0.01, -1, 1.0) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 0bb06e9e8ac9..3f80be63b4ea 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -19,7 +19,9 @@ package org.apache.spark.ml.recommendation import java.io.File import java.util.Random - +import breeze.linalg.{DenseVector => BrzVector, DenseMatrix => BrzMatrix, sum, norm, upperTriangular} +import breeze.numerics._ +import breeze.optimize.proximal.QuadraticMinimizer import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -33,6 +35,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} +import breeze.optimize.proximal.Constraint._ import org.apache.spark.util.Utils class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { @@ -164,6 +167,127 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { assert(Vectors.dense(x1) ~== Vectors.dense(-0.1155556, 3.28) relTol 1e-6) } + test("QuadraticSolver without proximal operator") { + val k = 2 + val ne0 = new NormalEquation(k) + .add(Array(1.0f, 2.0f), 4.0f) + .add(Array(1.0f, 3.0f), 9.0f) + .add(Array(1.0f, 4.0f), 16.0f) + val ne1 = new NormalEquation(k) + .merge(ne0) + + val qm = new QuadraticSolver(2, SMOOTH) + val x0 = qm.solve(ne0, 0.0).map(_.toDouble) + // NumPy code that computes the expected solution: + // A = np.matrix("1 2; 1 3; 1 4") + // b = b = np.matrix("3; 6") + // x0 = np.linalg.lstsq(A, b)[0] + assert(Vectors.dense(x0) ~== Vectors.dense(-8.333333, 6.0) relTol 1e-6) + + assert(ne0.n === 0) + assert(ne0.ata.forall(_ == 0.0)) + assert(ne0.atb.forall(_ == 0.0)) + + val x1 = qm.solve(ne1, 0.5).map(_.toDouble) + // NumPy code that computes the expected solution, where lambda is scaled by n: + // x0 = np.linalg.solve(A.transpose() * A + 0.5 * 3 * np.eye(2), A.transpose() * b) + assert(Vectors.dense(x1) ~== Vectors.dense(-0.1155556, 3.28) relTol 1e-6) + } + + test("QuadraticSolver with POSITIVE constraint") { + val n = 5 + val ata = Array( + 4.377, + -3.531, 4.344, + -1.306, 0.934, 2.644, + -0.139, 0.305, -0.203, 5.883, + 3.418, -2.140, -0.170, 1.428, 4.684) + val atb: Array[Double] = Array(-1.632, 2.115, 1.094, -1.025, -0.636) + + val ne = new NormalEquation(5) + + System.arraycopy(ata, 0, ne.ata, 0, ne.triK) + System.arraycopy(atb, 0, ne.atb, 0, ne.k) + + val goodx = Vectors.dense(0.13025, 0.54506, 0.2874, 0.0, 0.028628) + val qm = new QuadraticSolver(n, POSITIVE) + val xpos = qm.solve(ne, 0.0).map(_.toDouble) + + assert(Vectors.dense(xpos) ~= goodx absTol 1e-3) + } + + test("QuadraticSolver with BOX constraint") { + val n = 5 + val ata = Array( + 4.3142, + -3.3292,14.2353, + -2.4655,-3.1405,8.4796, + -3.3249,-2.3052,2.5011,18.1953, + -4.3772,0.7867,3.2092,12.9338,10.5297) + + val atb = Array(-1.0347, -0.7269, 0.3034, -0.2939, 0.7873) + + val ne = new NormalEquation(5) + + System.arraycopy(ata, 0, ne.ata, 0, ne.triK) + System.arraycopy(atb, 0, ne.atb, 0, ne.k) + + val goodx = Vectors.dense(0.0000, 0.0000, 0.0085, 0.0000, 0.0722) + + val qm = new QuadraticSolver(n, BOX) + val xbounds = qm.solve(ne, 0.0).map(_.toDouble) + assert(Vectors.dense(xbounds) ~= goodx absTol 1e-3) + } + + // Generated from MovieLens dataset debug on convergence + test("QuadraticSolver with EQUALITY constraint") { + val Hml = new BrzMatrix(25, 25, Array(112.647378, 44.962984, 49.127829, 43.708389, 43.333008, 46.345827, 49.581542, 42.991226, 43.999341, 41.336724, 42.879992, 46.896465, 41.778920, 46.333559, 51.168782, 44.800998, 43.735417, 42.672057, 40.024492, 48.793499, 48.696170, 45.870016, 46.398093, 44.305791, 41.863013, 44.962984, 107.202825, 44.218178, 38.585858, 36.606830, 41.783275, 44.631314, 40.883821, 37.948817, 34.908843, 38.356328, 43.642467, 36.213124, 38.760314, 43.317775, 36.803445, 41.905953, 40.238334, 42.325769, 45.853665, 46.601722, 40.668861, 49.084078, 39.292553, 35.781804, 49.127829, 44.218178, 118.264304, 40.139032, 43.741591, 49.387932, 45.558785, 40.793703, 46.136010, 41.839393, 39.544248, 43.161644, 43.361811, 43.832852, 50.572459, 42.036961, 47.251940, 45.273068, 42.842437, 49.323737, 52.125739, 45.831747, 49.466716, 44.762183, 41.930313, 43.708389, 38.585858, 40.139032, 94.937989, 36.562570, 41.628404, 38.604965, 39.080500, 37.267530, 34.291272, 34.891704, 39.216238, 35.970491, 40.733288, 41.872521, 35.825264, 38.144457, 41.368293, 40.751910, 41.123673, 41.930358, 41.002915, 43.099168, 36.018699, 33.646602, 43.333008, 36.606830, 43.741591, 36.562570, 105.764912, 42.799031, 38.215171, 42.193565, 38.708056, 39.448031, 37.882184, 40.172339, 40.625192, 39.015338, 36.433413, 40.848178, 36.480813, 41.439981, 40.797598, 40.325652, 38.599119, 42.727171, 39.382845, 41.535989, 41.518779, 46.345827, 41.783275, 49.387932, 41.628404, 42.799031, 114.691992, 43.015599, 42.688570, 42.722905, 38.675192, 38.377970, 44.656183, 39.087805, 45.443516, 50.585268, 40.949970, 41.920556, 43.711898, 41.463472, 51.248836, 46.869144, 45.178199, 45.709593, 42.402465, 44.097412, 49.581542, 44.631314, 45.558785, 38.604965, 38.215171, 43.015599, 114.667896, 40.966284, 37.748084, 39.496813, 40.534741, 42.770125, 40.628678, 41.194251, 47.837969, 44.596875, 43.448257, 43.291878, 39.005953, 50.493111, 46.296591, 43.449036, 48.798961, 42.877859, 37.055014, 42.991226, 40.883821, 40.793703, 39.080500, 42.193565, 42.688570, 40.966284, 106.632656, 37.640927, 37.181799, 40.065085, 38.978761, 36.014753, 38.071494, 41.081064, 37.981693, 41.821252, 42.773603, 39.293957, 38.600491, 43.761301, 42.294750, 42.410289, 40.266469, 39.909538, 43.999341, 37.948817, 46.136010, 37.267530, 38.708056, 42.722905, 37.748084, 37.640927, 102.747189, 34.574727, 36.525241, 39.839891, 36.297838, 42.756496, 44.673874, 38.350523, 40.330611, 42.288511, 39.472844, 45.617102, 44.692618, 41.194977, 41.284030, 39.580938, 42.382268, 41.336724, 34.908843, 41.839393, 34.291272, 39.448031, 38.675192, 39.496813, 37.181799, 34.574727, 94.205550, 37.583319, 38.504211, 36.376976, 34.239351, 39.060978, 37.515228, 37.079566, 37.317791, 38.046576, 36.112222, 39.406838, 39.258432, 36.347136, 38.927619, 41.604838, 42.879992, 38.356328, 39.544248, 34.891704, 37.882184, 38.377970, 40.534741, 40.065085, 36.525241, 37.583319, 98.109622, 39.428284, 37.518381, 39.659011, 38.477483, 40.547021, 42.678061, 42.279104, 41.515782, 43.478416, 45.003800, 42.433639, 42.757336, 35.814356, 39.017848, 46.896465, 43.642467, 43.161644, 39.216238, 40.172339, 44.656183, 42.770125, 38.978761, 39.839891, 38.504211, 39.428284, 103.478446, 39.984358, 40.587958, 44.490750, 40.600474, 40.698368, 42.296794, 41.567854, 47.002908, 43.922434, 43.479144, 44.291425, 43.352951, 42.613649, 41.778920, 36.213124, 43.361811, 35.970491, 40.625192, 39.087805, 40.628678, 36.014753, 36.297838, 36.376976, 37.518381, 39.984358, 99.799628, 38.027891, 44.268308, 36.202204, 39.921811, 38.668774, 36.832286, 45.833218, 43.228963, 36.833273, 44.787401, 38.176476, 39.062471, 46.333559, 38.760314, 43.832852, 40.733288, 39.015338, 45.443516, 41.194251, 38.071494, 42.756496, 34.239351, 39.659011, 40.587958, 38.027891, 114.304283, 46.958354, 39.636801, 40.927870, 49.118094, 43.093642, 50.196436, 45.535041, 43.087415, 48.540036, 35.942528, 37.962886, 51.168782, 43.317775, 50.572459, 41.872521, 36.433413, 50.585268, 47.837969, 41.081064, 44.673874, 39.060978, 38.477483, 44.490750, 44.268308, 46.958354, 122.935323, 39.948695, 46.801841, 44.455283, 40.160668, 54.193098, 49.678271, 41.834745, 47.227606, 42.214571, 42.598524, 44.800998, 36.803445, 42.036961, 35.825264, 40.848178, 40.949970, 44.596875, 37.981693, 38.350523, 37.515228, 40.547021, 40.600474, 36.202204, 39.636801, 39.948695, 97.365126, 40.163209, 39.177628, 38.935283, 41.465246, 40.962743, 40.533287, 43.367907, 38.723316, 36.312733, 43.735417, 41.905953, 47.251940, 38.144457, 36.480813, 41.920556, 43.448257, 41.821252, 40.330611, 37.079566, 42.678061, 40.698368, 39.921811, 40.927870, 46.801841, 40.163209, 110.416786, 46.843429, 41.834126, 46.788801, 46.983780, 43.511429, 47.291825, 40.023523, 40.581819, 42.672057, 40.238334, 45.273068, 41.368293, 41.439981, 43.711898, 43.291878, 42.773603, 42.288511, 37.317791, 42.279104, 42.296794, 38.668774, 49.118094, 44.455283, 39.177628, 46.843429, 107.474576, 44.590023, 48.333476, 44.059916, 42.653703, 44.171623, 39.363181, 41.716539, 40.024492, 42.325769, 42.842437, 40.751910, 40.797598, 41.463472, 39.005953, 39.293957, 39.472844, 38.046576, 41.515782, 41.567854, 36.832286, 43.093642, 40.160668, 38.935283, 41.834126, 44.590023, 105.140579, 43.149105, 41.516560, 43.494333, 45.664210, 36.466241, 37.477898, 48.793499, 45.853665, 49.323737, 41.123673, 40.325652, 51.248836, 50.493111, 38.600491, 45.617102, 36.112222, 43.478416, 47.002908, 45.833218, 50.196436, 54.193098, 41.465246, 46.788801, 48.333476, 43.149105, 123.746816, 53.234332, 44.633908, 53.537592, 43.196327, 42.747181, 48.696170, 46.601722, 52.125739, 41.930358, 38.599119, 46.869144, 46.296591, 43.761301, 44.692618, 39.406838, 45.003800, 43.922434, 43.228963, 45.535041, 49.678271, 40.962743, 46.983780, 44.059916, 41.516560, 53.234332, 125.202062, 43.967875, 52.416619, 39.937196, 39.775405, 45.870016, 40.668861, 45.831747, 41.002915, 42.727171, 45.178199, 43.449036, 42.294750, 41.194977, 39.258432, 42.433639, 43.479144, 36.833273, 43.087415, 41.834745, 40.533287, 43.511429, 42.653703, 43.494333, 44.633908, 43.967875, 107.336922, 44.396001, 39.819884, 38.676633, 46.398093, 49.084078, 49.466716, 43.099168, 39.382845, 45.709593, 48.798961, 42.410289, 41.284030, 36.347136, 42.757336, 44.291425, 44.787401, 48.540036, 47.227606, 43.367907, 47.291825, 44.171623, 45.664210, 53.537592, 52.416619, 44.396001, 114.651847, 40.826050, 37.634130, 44.305791, 39.292553, 44.762183, 36.018699, 41.535989, 42.402465, 42.877859, 40.266469, 39.580938, 38.927619, 35.814356, 43.352951, 38.176476, 35.942528, 42.214571, 38.723316, 40.023523, 39.363181, 36.466241, 43.196327, 39.937196, 39.819884, 40.826050, 96.128345, 40.788606, 41.863013, 35.781804, 41.930313, 33.646602, 41.518779, 44.097412, 37.055014, 39.909538, 42.382268, 41.604838, 39.017848, 42.613649, 39.062471, 37.962886, 42.598524, 36.312733, 40.581819, 41.716539, 37.477898, 42.747181, 39.775405, 38.676633, 37.634130, 40.788606, 97.605849)) + val fml = BrzVector(-1219.296604, -1126.029219, -1202.257728, -1022.064083, -1047.414836, -1155.507387, -1169.502847, -1091.655366, -1063.832607, -1015.829142, -1075.864072, -1101.427162, -1058.907539, -1115.171116, -1205.015211, -1090.627084, -1143.206126, -1140.107801, -1100.285642, -1198.992795, -1246.276120, -1159.678276, -1194.177391, -1056.458015, -1058.791892) + fml *= -1.0 + + val ml = 25 + val upper = upperTriangular(Hml).data.filter{x=> abs(x) >= 1e-8} + + val ne = new NormalEquation(ml) + + System.arraycopy(upper, 0, ne.ata, 0, ne.triK) + System.arraycopy(fml.data, 0, ne.atb, 0, ne.k) + + val goodx = Vectors.dense(0.3131862265452959, 0.0, 0.01129486116330884, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.060642310566736704, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6151756449091074, 0.0, 0.0, 0.0, 0.0) + + val qm = new QuadraticSolver(ml, EQUALITY) + val xequality = qm.solve(ne, 0.0).map(_.toDouble) + assert(Vectors.dense(xequality) ~= goodx absTol 1e-3) + assert(sum(BrzVector(xequality)) ~= 1.0 absTol 1e-4) + } + + // Generated from MovieLens dataset debug on convergence + test("QuadraticSolver with SPARSE constraint") { + val Hl1 = new BrzMatrix(25, 25, Array(253.535098, 236.477785, 234.421906, 223.374867, 241.007512, 204.695511, 226.465507, 223.351032, 249.179386, 221.411909, 238.679352, 203.579010, 217.564498, 243.852681, 266.607649, 213.994496, 241.620759, 223.602907, 220.038678, 264.704959, 240.341716, 223.672378, 244.323303, 223.216217, 226.074990, 236.477785, 278.862035, 245.756639, 237.489890, 252.783139, 214.077652, 241.816953, 238.790633, 260.536460, 228.121417, 255.103936, 216.608405, 237.150426, 258.933231, 281.958112, 228.971242, 252.508513, 234.242638, 240.308477, 285.416390, 254.792243, 240.176223, 259.048267, 235.566855, 236.277617, 234.421906, 245.756639, 269.162882, 231.416867, 251.321527, 208.134322, 236.567647, 236.558029, 255.805108, 226.535825, 251.514713, 212.770208, 228.565362, 261.748652, 273.946966, 227.411615, 252.767900, 232.823977, 233.084574, 278.315614, 250.872786, 235.227909, 255.104263, 238.931093, 235.402356, 223.374867, 237.489890, 231.416867, 254.771963, 241.703229, 209.028084, 231.517998, 228.768510, 250.805315, 216.548935, 245.473869, 207.687875, 222.036114, 250.906955, 263.018181, 216.128966, 244.445283, 227.436840, 231.369510, 270.721492, 242.475130, 226.471530, 248.130112, 225.826557, 228.266719, 241.007512, 252.783139, 251.321527, 241.703229, 285.702320, 219.051868, 249.442308, 240.400187, 264.970407, 232.503138, 258.819837, 220.160683, 235.621356, 267.743972, 285.795029, 229.667231, 260.870105, 240.751687, 247.183922, 289.044453, 260.715749, 244.210258, 267.159502, 242.992822, 244.070245, 204.695511, 214.077652, 208.134322, 209.028084, 219.051868, 210.164224, 208.151908, 201.539036, 226.373834, 192.056565, 219.950686, 191.459568, 195.982460, 226.739575, 240.677519, 196.116652, 217.352348, 203.533069, 204.581690, 243.603643, 217.785986, 204.205559, 223.747953, 203.586842, 200.165867, 226.465507, 241.816953, 236.567647, 231.517998, 249.442308, 208.151908, 264.007925, 227.080718, 253.174653, 220.322823, 248.619983, 210.100242, 223.279198, 254.807401, 269.896959, 222.927882, 247.017507, 230.484479, 233.358639, 274.935489, 249.237737, 235.229584, 253.029955, 228.601700, 230.512885, 223.351032, 238.790633, 236.558029, 228.768510, 240.400187, 201.539036, 227.080718, 258.773479, 249.471480, 215.664539, 243.078577, 202.337063, 221.020998, 249.979759, 263.356244, 213.470569, 246.182278, 225.727773, 229.873732, 266.295057, 242.954024, 225.510760, 249.370268, 227.813265, 232.141964, 249.179386, 260.536460, 255.805108, 250.805315, 264.970407, 226.373834, 253.174653, 249.471480, 302.360150, 237.902729, 265.769812, 224.947876, 243.088105, 273.690377, 291.076027, 241.089661, 267.772651, 248.459822, 249.662698, 295.935799, 267.460908, 255.668926, 275.902272, 248.495606, 246.827505, 221.411909, 228.121417, 226.535825, 216.548935, 232.503138, 192.056565, 220.322823, 215.664539, 237.902729, 245.154567, 234.956316, 199.557862, 214.774631, 240.339217, 255.161923, 209.328714, 232.277540, 216.298768, 220.296241, 253.817633, 237.638235, 220.785141, 239.098500, 220.583355, 218.962732, 238.679352, 255.103936, 251.514713, 245.473869, 258.819837, 219.950686, 248.619983, 243.078577, 265.769812, 234.956316, 288.133073, 225.087852, 239.810430, 268.406605, 283.289840, 233.858455, 258.306589, 240.263617, 246.844456, 290.492875, 267.212598, 243.218596, 265.681905, 244.615890, 242.543363, 203.579010, 216.608405, 212.770208, 207.687875, 220.160683, 191.459568, 210.100242, 202.337063, 224.947876, 199.557862, 225.087852, 217.501685, 197.897572, 229.825316, 242.175607, 201.123644, 219.820165, 202.894307, 211.468055, 246.048907, 225.135194, 210.076305, 226.806762, 212.014431, 205.123267, 217.564498, 237.150426, 228.565362, 222.036114, 235.621356, 195.982460, 223.279198, 221.020998, 243.088105, 214.774631, 239.810430, 197.897572, 244.439113, 241.621129, 260.400953, 216.482178, 236.805076, 216.680343, 223.816297, 263.188711, 236.311810, 222.950152, 244.636356, 219.121372, 219.911078, 243.852681, 258.933231, 261.748652, 250.906955, 267.743972, 226.739575, 254.807401, 249.979759, 273.690377, 240.339217, 268.406605, 229.825316, 241.621129, 302.928261, 288.344398, 238.549018, 267.239982, 248.073140, 254.230916, 296.789984, 267.158551, 252.226496, 271.170860, 248.325354, 253.694013, 266.607649, 281.958112, 273.946966, 263.018181, 285.795029, 240.677519, 269.896959, 263.356244, 291.076027, 255.161923, 283.289840, 242.175607, 260.400953, 288.344398, 343.457361, 257.368309, 284.795470, 263.122266, 271.239770, 320.209823, 283.933299, 264.416752, 292.035194, 268.764031, 265.345807, 213.994496, 228.971242, 227.411615, 216.128966, 229.667231, 196.116652, 222.927882, 213.470569, 241.089661, 209.328714, 233.858455, 201.123644, 216.482178, 238.549018, 257.368309, 239.295031, 234.913508, 218.066855, 219.648997, 257.969951, 231.243624, 224.657569, 238.158714, 217.174368, 215.933866, 241.620759, 252.508513, 252.767900, 244.445283, 260.870105, 217.352348, 247.017507, 246.182278, 267.772651, 232.277540, 258.306589, 219.820165, 236.805076, 267.239982, 284.795470, 234.913508, 289.709239, 241.312315, 247.249491, 286.702147, 264.252852, 245.151647, 264.582984, 240.842689, 245.837476, 223.602907, 234.242638, 232.823977, 227.436840, 240.751687, 203.533069, 230.484479, 225.727773, 248.459822, 216.298768, 240.263617, 202.894307, 216.680343, 248.073140, 263.122266, 218.066855, 241.312315, 255.363057, 230.209787, 271.091482, 239.220241, 225.387834, 247.486715, 226.052431, 224.119935, 220.038678, 240.308477, 233.084574, 231.369510, 247.183922, 204.581690, 233.358639, 229.873732, 249.662698, 220.296241, 246.844456, 211.468055, 223.816297, 254.230916, 271.239770, 219.648997, 247.249491, 230.209787, 264.014907, 271.938970, 246.664305, 227.889045, 249.908085, 232.035369, 229.010298, 264.704959, 285.416390, 278.315614, 270.721492, 289.044453, 243.603643, 274.935489, 266.295057, 295.935799, 253.817633, 290.492875, 246.048907, 263.188711, 296.789984, 320.209823, 257.969951, 286.702147, 271.091482, 271.938970, 352.825726, 286.200221, 267.716897, 297.182554, 269.776351, 266.721561, 240.341716, 254.792243, 250.872786, 242.475130, 260.715749, 217.785986, 249.237737, 242.954024, 267.460908, 237.638235, 267.212598, 225.135194, 236.311810, 267.158551, 283.933299, 231.243624, 264.252852, 239.220241, 246.664305, 286.200221, 294.042749, 246.504021, 269.570596, 243.980697, 242.690997, 223.672378, 240.176223, 235.227909, 226.471530, 244.210258, 204.205559, 235.229584, 225.510760, 255.668926, 220.785141, 243.218596, 210.076305, 222.950152, 252.226496, 264.416752, 224.657569, 245.151647, 225.387834, 227.889045, 267.716897, 246.504021, 259.897656, 251.730847, 229.335712, 229.759185, 244.323303, 259.048267, 255.104263, 248.130112, 267.159502, 223.747953, 253.029955, 249.370268, 275.902272, 239.098500, 265.681905, 226.806762, 244.636356, 271.170860, 292.035194, 238.158714, 264.582984, 247.486715, 249.908085, 297.182554, 269.570596, 251.730847, 303.872223, 251.585636, 247.878402, 223.216217, 235.566855, 238.931093, 225.826557, 242.992822, 203.586842, 228.601700, 227.813265, 248.495606, 220.583355, 244.615890, 212.014431, 219.121372, 248.325354, 268.764031, 217.174368, 240.842689, 226.052431, 232.035369, 269.776351, 243.980697, 229.335712, 251.585636, 257.544914, 228.810942, 226.074990, 236.277617, 235.402356, 228.266719, 244.070245, 200.165867, 230.512885, 232.141964, 246.827505, 218.962732, 242.543363, 205.123267, 219.911078, 253.694013, 265.345807, 215.933866, 245.837476, 224.119935, 229.010298, 266.721561, 242.690997, 229.759185, 247.878402, 228.810942, 253.353769)) + val fl1 = BrzVector(-892.842851, -934.071560, -932.936015, -888.124343, -961.050207, -791.191087, -923.711397, -904.289301, -988.384984, -883.909133, -959.465030, -798.551172, -871.622303, -997.463289, -1043.912620, -863.013719, -976.975712, -897.033693, -898.694786, -1069.245497, -963.491924, -901.263474, -983.768031, -899.865392, -902.283567) + fl1 *= -1.0 + val ml = 25 + + val upper = upperTriangular(Hl1).data.filter { x => abs(x) >= 1e-8 } + + val ne = new NormalEquation(ml) + ne.n = 1 + + System.arraycopy(upper, 0, ne.ata, 0, ne.triK) + System.arraycopy(fl1.data, 0, ne.atb, 0, ne.k) + + val goodx = Vectors.dense(0.18611, 0.00000, 0.06317, -0.10417, 0.11262, + -0.20495, 0.52668, 0.32790, 0.19421, 0.72180, + 0.06309, -0.41326, -0.00000, 0.52078, -0.00000, + 0.18040, 0.62915, 0.16329, -0.06424, 0.37539, + 0.01659, 0.00000, 0.11215, 0.24778, 0.04082) + + val qm = new QuadraticSolver(ml, SPARSE) + + val xl1 = qm.solve(ne, 2.0).map(_.toDouble) + assert(Vectors.dense(xl1) ~= goodx absTol 1e-3) + } + test("RatingBlockBuilder") { val emptyBuilder = new RatingBlockBuilder[Int]() assert(emptyBuilder.size === 0) @@ -457,7 +581,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { test("nonnegative constraint") { val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01) - val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true) + + val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, userConstraint=POSITIVE, itemConstraint=POSITIVE) def isNonnegative(factors: RDD[(Int, Array[Float])]): Boolean = { factors.values.map { _.forall(_ >= 0.0) }.reduce(_ && _) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala index 8775c0ca9df8..89bb144196c6 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala @@ -25,6 +25,7 @@ import org.scalatest.FunSuite import org.jblas.DoubleMatrix import org.apache.spark.mllib.util.MLlibTestSparkContext +import breeze.optimize.proximal.Constraint._ import org.apache.spark.storage.StorageLevel object ALSSuite { @@ -128,11 +129,11 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext { test("pseudorandomness") { val ratings = sc.parallelize(ALSSuite.generateRatings(10, 20, 5, 0.5, false, false)._1, 2) - val model11 = ALS.train(ratings, 5, 1, 1.0, 2, 1) - val model12 = ALS.train(ratings, 5, 1, 1.0, 2, 1) + val model11 = ALS.train(ratings, 5, 1, SMOOTH, SMOOTH, 1.0, 1.0, 2, 1) + val model12 = ALS.train(ratings, 5, 1, SMOOTH, SMOOTH, 1.0, 1.0, 2, 1) val u11 = model11.userFeatures.values.flatMap(_.toList).collect().toList val u12 = model12.userFeatures.values.flatMap(_.toList).collect().toList - val model2 = ALS.train(ratings, 5, 1, 1.0, 2, 2) + val model2 = ALS.train(ratings, 5, 1, SMOOTH, SMOOTH, 1.0, 1.0, 2, 2) val u2 = model2.userFeatures.values.flatMap(_.toList).collect().toList assert(u11 == u12) assert(u11 != u2) @@ -144,7 +145,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext { var model = new ALS() .setRank(5) .setIterations(1) - .setLambda(1.0) + .setUserLambda(1.0) + .setProductLambda(1.0) .setBlocks(2) .setSeed(1) .setFinalRDDStorageLevel(storageLevel) @@ -155,7 +157,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext { model = new ALS() .setRank(5) .setIterations(1) - .setLambda(1.0) + .setUserLambda(1.0) + .setProductLambda(1.0) .setBlocks(2) .setSeed(1) .setFinalRDDStorageLevel(storageLevel) @@ -219,17 +222,23 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext { val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products, features, samplingRate, implicitPrefs, negativeWeights, negativeFactors) - val model = new ALS() + val als = new ALS() .setUserBlocks(numUserBlocks) .setProductBlocks(numProductBlocks) .setRank(features) .setIterations(iterations) .setAlpha(1.0) .setImplicitPrefs(implicitPrefs) - .setLambda(0.01) + .setUserLambda(0.01) + .setProductLambda(0.01) .setSeed(0L) - .setNonnegative(!negativeFactors) - .run(sc.parallelize(sampledRatings)) + + if(!negativeFactors) { + als.setUserConstraint(POSITIVE) + als.setProductConstraint(POSITIVE) + } + + val model = als.run(sc.parallelize(sampledRatings)) val predictedU = new DoubleMatrix(users, features) for ((u, vec) <- model.userFeatures.collect(); i <- 0 until features) { diff --git a/pom.xml b/pom.xml index b3cecd1893a0..9b2c37905c07 100644 --- a/pom.xml +++ b/pom.xml @@ -1439,6 +1439,7 @@ org.apache.maven.plugins maven-source-plugin + org.apache.maven.plugins diff --git a/sbt/sbt b/sbt/sbt deleted file mode 100755 index 41438251f681..000000000000 --- a/sbt/sbt +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Determine the current working directory -_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -echo "NOTE: The sbt/sbt script has been relocated to build/sbt." >&2 -echo " Please update references to point to the new location." >&2 -echo "" >&2 -echo " Invoking 'build/sbt $@' now ..." >&2 -echo "" >&2 - -${_DIR}/../build/sbt "$@"