Skip to content

Commit 8b371dc

Browse files
committed
Second Pass on reviews by mengxr
1 parent eca9d37 commit 8b371dc

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ object MovieLensALS {
6464
.text(s"use Kryo serialization")
6565
.action((_, c) => c.copy(kryo = true))
6666
opt[Unit]("implicitPrefs")
67-
.text(s"use implicit preference")
67+
.text("use implicit preference")
6868
.action((_, c) => c.copy(implicitPrefs = true))
6969
arg[String]("<input>")
7070
.required()
@@ -93,7 +93,7 @@ object MovieLensALS {
9393
val ratings = sc.textFile(params.input).map { line =>
9494
val fields = line.split("::")
9595
if (params.implicitPrefs) {
96-
/**
96+
/*
9797
* MovieLens ratings are on a scale of 1-5:
9898
* 5: Must see
9999
* 4: Will enjoy
@@ -105,9 +105,7 @@ object MovieLensALS {
105105
* 5 -> 2.5, 4 -> 1.5, 3 -> 0.5, 2 -> -0.5, 1 -> -1.5. This mappings means unobserved
106106
* entries are generally between It's okay and Fairly bad.
107107
* The semantics of 0 in this expanded world of non-positive weights
108-
* are "the same as never having interacted at all"
109-
* It's possible that 0 values are ignored when constructing the sparse representation,
110-
* because the 0s are implicit. This would be a problem, at least, a theoretical one.
108+
* are "the same as never having interacted at all".
111109
*/
112110
Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5)
113111
} else {
@@ -124,14 +122,14 @@ object MovieLensALS {
124122
val splits = ratings.randomSplit(Array(0.8, 0.2))
125123
val training = splits(0).cache()
126124
val test = if (params.implicitPrefs) {
127-
/**
125+
/*
128126
* 0 means "don't know" and positive values mean "confident that the prediction should be 1".
129127
* Negative values means "confident that the prediction should be 0".
130128
* We have in this case used some kind of weighted RMSE. The weight is the absolute value of
131129
* the confidence. The error is the difference between prediction and either 1 or 0,
132130
* depending on whether r is positive or negative.
133131
*/
134-
splits(1).map(x => Rating(x.user, x.product, if(x.rating > 0) 1.0 else 0.0))
132+
splits(1).map(x => Rating(x.user, x.product, if (x.rating > 0) 1.0 else 0.0))
135133
} else {
136134
splits(1)
137135
}.cache()
@@ -159,12 +157,12 @@ object MovieLensALS {
159157
/** Compute RMSE (Root Mean Squared Error). */
160158
def computeRmse(model: MatrixFactorizationModel, data: RDD[Rating], implicitPrefs: Boolean) = {
161159

162-
def evalRating(r: Double) =
163-
if (!implicitPrefs) r else if (r > 1.0) 1.0 else if (r < 0.0) 0.0 else r
160+
def mapPredictedRating(r: Double) = if (implicitPrefs) math.max(math.min(r, 1.0), 0.0) else r
164161

165162
val predictions: RDD[Rating] = model.predict(data.map(x => (x.user, x.product)))
166-
val predictionsAndRatings = predictions.map(x => ((x.user, x.product), evalRating(x.rating)))
167-
.join(data.map(x => ((x.user, x.product), x.rating))).values
163+
val predictionsAndRatings = predictions.map{ x =>
164+
((x.user, x.product), mapPredictedRating(x.rating))
165+
}.join(data.map(x => ((x.user, x.product), x.rating))).values
168166
math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).mean())
169167
}
170168
}

0 commit comments

Comments
 (0)