@@ -64,7 +64,7 @@ object MovieLensALS {
6464 .text(s " use Kryo serialization " )
6565 .action((_, c) => c.copy(kryo = true ))
6666 opt[Unit ](" implicitPrefs" )
67- .text(s " use implicit preference " )
67+ .text(" use implicit preference" )
6868 .action((_, c) => c.copy(implicitPrefs = true ))
6969 arg[String ](" <input>" )
7070 .required()
@@ -93,7 +93,7 @@ object MovieLensALS {
9393 val ratings = sc.textFile(params.input).map { line =>
9494 val fields = line.split(" ::" )
9595 if (params.implicitPrefs) {
96- /**
96+ /*
9797 * MovieLens ratings are on a scale of 1-5:
9898 * 5: Must see
9999 * 4: Will enjoy
@@ -105,9 +105,7 @@ object MovieLensALS {
105105 * 5 -> 2.5, 4 -> 1.5, 3 -> 0.5, 2 -> -0.5, 1 -> -1.5. This mappings means unobserved
106106 * entries are generally between It's okay and Fairly bad.
107107 * The semantics of 0 in this expanded world of non-positive weights
108- * are "the same as never having interacted at all"
109- * It's possible that 0 values are ignored when constructing the sparse representation,
110- * because the 0s are implicit. This would be a problem, at least, a theoretical one.
108+ * are "the same as never having interacted at all".
111109 */
112110 Rating (fields(0 ).toInt, fields(1 ).toInt, fields(2 ).toDouble - 2.5 )
113111 } else {
@@ -124,14 +122,14 @@ object MovieLensALS {
124122 val splits = ratings.randomSplit(Array (0.8 , 0.2 ))
125123 val training = splits(0 ).cache()
126124 val test = if (params.implicitPrefs) {
127- /**
125+ /*
128126 * 0 means "don't know" and positive values mean "confident that the prediction should be 1".
129127 * Negative values means "confident that the prediction should be 0".
130128 * We have in this case used some kind of weighted RMSE. The weight is the absolute value of
131129 * the confidence. The error is the difference between prediction and either 1 or 0,
132130 * depending on whether r is positive or negative.
133131 */
134- splits(1 ).map(x => Rating (x.user, x.product, if (x.rating > 0 ) 1.0 else 0.0 ))
132+ splits(1 ).map(x => Rating (x.user, x.product, if (x.rating > 0 ) 1.0 else 0.0 ))
135133 } else {
136134 splits(1 )
137135 }.cache()
@@ -159,12 +157,12 @@ object MovieLensALS {
159157 /** Compute RMSE (Root Mean Squared Error). */
160158 def computeRmse (model : MatrixFactorizationModel , data : RDD [Rating ], implicitPrefs : Boolean ) = {
161159
162- def evalRating (r : Double ) =
163- if (! implicitPrefs) r else if (r > 1.0 ) 1.0 else if (r < 0.0 ) 0.0 else r
160+ def mapPredictedRating (r : Double ) = if (implicitPrefs) math.max(math.min(r, 1.0 ), 0.0 ) else r
164161
165162 val predictions : RDD [Rating ] = model.predict(data.map(x => (x.user, x.product)))
166- val predictionsAndRatings = predictions.map(x => ((x.user, x.product), evalRating(x.rating)))
167- .join(data.map(x => ((x.user, x.product), x.rating))).values
163+ val predictionsAndRatings = predictions.map{ x =>
164+ ((x.user, x.product), mapPredictedRating(x.rating))
165+ }.join(data.map(x => ((x.user, x.product), x.rating))).values
168166 math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean())
169167 }
170168}
0 commit comments