From 1290ff83175b9724e3f205cb4ac356d017ddf1ca Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Mon, 24 Apr 2017 16:50:37 -0400
Subject: [PATCH 01/11] update v7

---
 .../classification/LogisticRegression.scala   | 319 +++++++++++++-----
 .../LogisticRegressionSuite.scala             |  99 ++++--
 .../ml/regression/LinearRegressionSuite.scala |   2 +-
 project/MimaExcludes.scala                    |  20 +-
 4 files changed, 337 insertions(+), 103 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 21957d94e2dc..1d09b16a035e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -35,7 +35,7 @@ import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
-import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
+import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics}
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
@@ -882,21 +882,28 @@ class LogisticRegression @Since("1.2.0") (
 
     val model = copyValues(new LogisticRegressionModel(uid, coefficientMatrix, interceptVector,
       numClasses, isMultinomial))
-    // TODO: implement summary model for multinomial case
-    val m = if (!isMultinomial) {
-      val (summaryModel, probabilityColName) = model.findSummaryModelAndProbabilityCol()
-      val logRegSummary = new BinaryLogisticRegressionTrainingSummary(
+
+    val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
+    val logRegSummary = if (numClasses <= 2) {
+      new BinaryLogisticRegressionTrainingSummaryImpl(
         summaryModel.transform(dataset),
         probabilityColName,
+        predictionColName,
         $(labelCol),
         $(featuresCol),
         objectiveHistory)
-      model.setSummary(Some(logRegSummary))
     } else {
-      model
+      new LogisticRegressionTrainingSummaryImpl(
+        summaryModel.transform(dataset),
+        probabilityColName,
+        predictionColName,
+        $(labelCol),
+        $(featuresCol),
+        objectiveHistory)
     }
-    instr.logSuccess(m)
-    m
+    model.setSummary(Some(logRegSummary))
+    instr.logSuccess(model)
+    model
   }
 
   @Since("1.4.0")
@@ -1018,19 +1025,33 @@ class LogisticRegressionModel private[spark] (
     throw new SparkException("No training summary available for this LogisticRegressionModel")
   }
 
+  @Since("2.2.0")
+  def binarySummary: BinaryLogisticRegressionTrainingSummary = summary match {
+    case b: BinaryLogisticRegressionTrainingSummary => b
+    case _ =>
+      throw new RuntimeException("Cannot create a binary summary for a non-binary model" +
+        s"(numClasses=${numClasses}), use summary instead.")
+  }
+
   /**
-   * If the probability column is set returns the current model and probability column,
-   * otherwise generates a new column and sets it as the probability column on a new copy
-   * of the current model.
+   * If the probability and prediction columns are set, this method returns the current model,
+   * otherwise it generates new columns for them and sets them as columns on a new copy of
+   * the current model
    */
-  private[classification] def findSummaryModelAndProbabilityCol():
-      (LogisticRegressionModel, String) = {
-    $(probabilityCol) match {
-      case "" =>
-        val probabilityColName = "probability_" + java.util.UUID.randomUUID.toString
-        (copy(ParamMap.empty).setProbabilityCol(probabilityColName), probabilityColName)
-      case p => (this, p)
+  private[classification] def findSummaryModel():
+      (LogisticRegressionModel, String, String) = {
+    val model = if ($(probabilityCol).isEmpty && $(predictionCol).isEmpty) {
+      copy(ParamMap.empty)
+        .setProbabilityCol("probability_" + java.util.UUID.randomUUID.toString)
+        .setPredictionCol("prediction_" + java.util.UUID.randomUUID.toString)
+    } else if ($(probabilityCol).isEmpty) {
+      copy(ParamMap.empty).setProbabilityCol("probability_" + java.util.UUID.randomUUID.toString)
+    } else if ($(predictionCol).isEmpty) {
+      copy(ParamMap.empty).setPredictionCol("prediction_" + java.util.UUID.randomUUID.toString)
+    } else {
+      this
     }
+    (model, model.getProbabilityCol, model.getPredictionCol)
   }
 
   private[classification]
@@ -1051,9 +1072,14 @@ class LogisticRegressionModel private[spark] (
   @Since("2.0.0")
   def evaluate(dataset: Dataset[_]): LogisticRegressionSummary = {
     // Handle possible missing or invalid prediction columns
-    val (summaryModel, probabilityColName) = findSummaryModelAndProbabilityCol()
-    new BinaryLogisticRegressionSummary(summaryModel.transform(dataset),
-      probabilityColName, $(labelCol), $(featuresCol))
+    val (summaryModel, probabilityColName, predictionColName) = findSummaryModel()
+    if (numClasses > 2) {
+      new LogisticRegressionSummaryImpl(summaryModel.transform(dataset),
+        probabilityColName, predictionColName, $(labelCol), $(featuresCol))
+    } else {
+      new BinaryLogisticRegressionSummaryImpl(summaryModel.transform(dataset),
+        probabilityColName, predictionColName, $(labelCol), $(featuresCol))
+    }
   }
 
   /**
@@ -1324,90 +1350,128 @@ private[ml] class MultiClassSummarizer extends Serializable {
 }
 
 /**
- * Abstraction for multinomial Logistic Regression Training results.
- * Currently, the training summary ignores the training weights except
- * for the objective trace.
- */
-sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
-
-  /** objective function (scaled loss + regularization) at each iteration. */
-  def objectiveHistory: Array[Double]
-
-  /** Number of training iterations until termination */
-  def totalIterations: Int = objectiveHistory.length
-
-}
-
-/**
- * Abstraction for Logistic Regression Results for a given model.
+ * Abstraction for logistic regression results for a given model.
  */
 sealed trait LogisticRegressionSummary extends Serializable {
 
   /**
    * Dataframe output by the model's `transform` method.
    */
+  @Since("2.2.0")
   def predictions: DataFrame
 
   /** Field in "predictions" which gives the probability of each class as a vector. */
+  @Since("2.2.0")
   def probabilityCol: String
 
+  /** Field in "predictions" which gives the prediction of each class. */
+  @Since("2.2.0")
+  def predictionCol: String
+
   /** Field in "predictions" which gives the true label of each instance (if available). */
+  @Since("2.2.0")
   def labelCol: String
 
   /** Field in "predictions" which gives the features of each instance as a vector. */
+  @Since("2.2.0")
   def featuresCol: String
 
+  @transient private val multiclassMetrics = {
+    new MulticlassMetrics(
+      predictions.select(
+        col(predictionCol),
+        col(labelCol).cast(DoubleType))
+        .rdd.map { case Row(prediction: Double, label: Double) => (prediction, label) })
+  }
+
+  /** Returns true positive rate for each label. */
+  @Since("2.2.0")
+  def truePositiveRateByLabel: Array[Double] = recallByLabel
+
+  /** Returns false positive rate for each label. */
+  @Since("2.2.0")
+  def falsePositiveRateByLabel: Array[Double] = {
+    multiclassMetrics.labels.map(label => multiclassMetrics.falsePositiveRate(label))
+  }
+
+  /** Returns precision for each label. */
+  @Since("2.2.0")
+  def precisionByLabel: Array[Double] = {
+    multiclassMetrics.labels.map(label => multiclassMetrics.precision(label))
+  }
+
+  /** Returns recall for each label. */
+  @Since("2.2.0")
+  def recallByLabel: Array[Double] = {
+    multiclassMetrics.labels.map(label => multiclassMetrics.recall(label))
+  }
+
+  /**
+   * Returns f-measure for each label.
+   */
+  @Since("2.2.0")
+  def fMeasureByLabel(beta: Double): Array[Double] = {
+    multiclassMetrics.labels.map(label => multiclassMetrics.fMeasure(label, beta))
+  }
+
+  /** Returns f1-measure for each label. */
+  @Since("2.2.0")
+  def fMeasureByLabel: Array[Double] = fMeasureByLabel(1.0)
+
+  /** Returns accuracy. */
+  @Since("2.2.0")
+  def accuracy: Double = multiclassMetrics.accuracy
+
+  /** Returns weighted true positive rate. */
+  @Since("2.2.0")
+  def weightedTruePositiveRate: Double = weightedRecall
+
+  /** Returns weighted false positive rate. */
+  @Since("2.2.0")
+  def weightedFalsePositiveRate: Double = multiclassMetrics.weightedFalsePositiveRate
+
+  /** Returns weighted averaged recall. */
+  @Since("2.2.0")
+  def weightedRecall: Double = multiclassMetrics.weightedRecall
+
+  /** Returns weighted averaged precision. */
+  @Since("2.2.0")
+  def weightedPrecision: Double = multiclassMetrics.weightedPrecision
+
+  /**
+   * Returns weighted averaged f-measure.
+   */
+  @Since("2.2.0")
+  def weightedFMeasure(beta: Double): Double = multiclassMetrics.weightedFMeasure(beta)
+
+  /** Returns weighted averaged f1-measure. */
+  @Since("2.2.0")
+  def weightedFMeasure: Double = multiclassMetrics.weightedFMeasure(1.0)
 }
 
 /**
- * :: Experimental ::
- * Logistic regression training results.
- *
- * @param predictions dataframe output by the model's `transform` method.
- * @param probabilityCol field in "predictions" which gives the probability of
- *                       each class as a vector.
- * @param labelCol field in "predictions" which gives the true label of each instance.
- * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
- * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
+ * Abstraction for multiclass logistic regression training results.
+ * Currently, the training summary ignores the training weights except
+ * for the objective trace.
  */
-@Experimental
-@Since("1.5.0")
-class BinaryLogisticRegressionTrainingSummary private[classification] (
-    predictions: DataFrame,
-    probabilityCol: String,
-    labelCol: String,
-    featuresCol: String,
-    @Since("1.5.0") val objectiveHistory: Array[Double])
-  extends BinaryLogisticRegressionSummary(predictions, probabilityCol, labelCol, featuresCol)
-  with LogisticRegressionTrainingSummary {
+sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
+
+  /** objective function (scaled loss + regularization) at each iteration. */
+  def objectiveHistory: Array[Double]
+
+  /** Number of training iterations. */
+  def totalIterations: Int = objectiveHistory.length
 
 }
 
 /**
- * :: Experimental ::
- * Binary Logistic regression results for a given model.
- *
- * @param predictions dataframe output by the model's `transform` method.
- * @param probabilityCol field in "predictions" which gives the probability of
- *                       each class as a vector.
- * @param labelCol field in "predictions" which gives the true label of each instance.
- * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
+ * Abstraction for binary logistic regression results for a given model.
  */
-@Experimental
-@Since("1.5.0")
-class BinaryLogisticRegressionSummary private[classification] (
-    @Since("1.5.0") @transient override val predictions: DataFrame,
-    @Since("1.5.0") override val probabilityCol: String,
-    @Since("1.5.0") override val labelCol: String,
-    @Since("1.6.0") override val featuresCol: String) extends LogisticRegressionSummary {
-
+sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary {
 
   private val sparkSession = predictions.sparkSession
   import sparkSession.implicits._
 
-  /**
-   * Returns a BinaryClassificationMetrics object.
-   */
   // TODO: Allow the user to vary the number of bins using a setBins method in
   // BinaryClassificationMetrics. For now the default is set to 100.
   @transient private val binaryMetrics = new BinaryClassificationMetrics(
@@ -1484,3 +1548,106 @@ class BinaryLogisticRegressionSummary private[classification] (
     binaryMetrics.recallByThreshold().toDF("threshold", "recall")
   }
 }
+
+sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegressionSummary
+  with LogisticRegressionTrainingSummary
+
+/**
+ * :: Experimental ::
+ * Multiclass logistic regression training results.
+ *
+ * @param predictions dataframe output by the model's `transform` method.
+ * @param probabilityCol field in "predictions" which gives the probability of
+ *                       each class as a vector.
+ * @param predictionCol field in "predictions" which gives the prediction for a data instance as a
+ *                      double.
+ * @param labelCol field in "predictions" which gives the true label of each instance.
+ * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
+ * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
+ */
+@Experimental
+@Since("2.2.0")
+private class LogisticRegressionTrainingSummaryImpl(
+    override val predictions: DataFrame,
+    override val probabilityCol: String,
+    override val predictionCol: String,
+    override val labelCol: String,
+    override val featuresCol: String,
+    @Since("1.5.0") val objectiveHistory: Array[Double])
+  extends LogisticRegressionSummaryImpl(
+    predictions, probabilityCol, predictionCol, labelCol, featuresCol)
+  with LogisticRegressionTrainingSummary {
+
+}
+
+/**
+ * :: Experimental ::
+ * Multiclass Logistic regression results for a given model.
+ *
+ * @param predictions dataframe output by the model's `transform` method.
+ * @param probabilityCol field in "predictions" which gives the probability of
+ *                       each class as a vector.
+ * @param predictionCol field in "predictions" which gives the prediction for a data instance as a
+ *                      double.
+ * @param labelCol field in "predictions" which gives the true label of each instance.
+ * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
+ */
+@Experimental
+@Since("2.2.0")
+private class LogisticRegressionSummaryImpl(
+    @Since("2.2.0") @transient override val predictions: DataFrame,
+    @Since("2.2.0") override val probabilityCol: String,
+    @Since("2.2.0") override val predictionCol: String,
+    @Since("2.2.0") override val labelCol: String,
+    @Since("2.2.0") override val featuresCol: String)
+  extends LogisticRegressionSummary
+
+/**
+ * :: Experimental ::
+ * Binary logistic regression training results.
+ *
+ * @param predictions dataframe output by the model's `transform` method.
+ * @param probabilityCol field in "predictions" which gives the probability of
+ *                       each class as a vector.
+ * @param predictionCol field in "predictions" which gives the prediction for a data instance as a
+ *                      double.
+ * @param labelCol field in "predictions" which gives the true label of each instance.
+ * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
+ * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
+ */
+@Experimental
+@Since("2.2.0")
+private class BinaryLogisticRegressionTrainingSummaryImpl(
+    override val predictions: DataFrame,
+    override val probabilityCol: String,
+    override val predictionCol: String,
+    override val labelCol: String,
+    override val featuresCol: String,
+    @Since("1.5.0") override val objectiveHistory: Array[Double])
+  extends BinaryLogisticRegressionSummaryImpl(
+    predictions, probabilityCol, predictionCol, labelCol, featuresCol)
+  with BinaryLogisticRegressionTrainingSummary
+
+/**
+ * :: Experimental ::
+ * Binary logistic regression results for a given model.
+ *
+ * @param predictions dataframe output by the model's `transform` method.
+ * @param probabilityCol field in "predictions" which gives the probability of
+ *                       each class as a vector.
+ * @param predictionCol field in "predictions" which gives the prediction of
+ *                      each class as a double.
+ * @param labelCol field in "predictions" which gives the true label of each instance.
+ * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
+ */
+@Experimental
+@Since("2.2.0")
+private class BinaryLogisticRegressionSummaryImpl(
+    @Since("2.2.0") @transient override val predictions: DataFrame,
+    @Since("2.2.0") override val probabilityCol: String,
+    @Since("2.2.0") override val predictionCol: String,
+    @Since("2.2.0") override val labelCol: String,
+    @Since("2.2.0") override val featuresCol: String)
+  extends LogisticRegressionSummaryImpl(
+    predictions, probabilityCol, predictionCol, labelCol, featuresCol)
+  with BinaryLogisticRegressionSummary
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 0570499e7451..cc701dfdeb98 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -2263,51 +2263,98 @@ class LogisticRegressionSuite
   }
 
   test("evaluate on test set") {
-    // TODO: add for multiclass when model summary becomes available
     // Evaluate on test set should be same as that of the transformed training data.
     val lr = new LogisticRegression()
       .setMaxIter(10)
       .setRegParam(1.0)
       .setThreshold(0.6)
-    val model = lr.fit(smallBinaryDataset)
-    val summary = model.summary.asInstanceOf[BinaryLogisticRegressionSummary]
-
-    val sameSummary =
-      model.evaluate(smallBinaryDataset).asInstanceOf[BinaryLogisticRegressionSummary]
-    assert(summary.areaUnderROC === sameSummary.areaUnderROC)
-    assert(summary.roc.collect() === sameSummary.roc.collect())
-    assert(summary.pr.collect === sameSummary.pr.collect())
+      .setFamily("binomial")
+    val blorModel = lr.fit(smallBinaryDataset)
+    val blorSummary = blorModel.binarySummary
+
+    val sameBlorSummary =
+      blorModel.evaluate(smallBinaryDataset).asInstanceOf[BinaryLogisticRegressionSummary]
+    assert(blorSummary.areaUnderROC === sameBlorSummary.areaUnderROC)
+    assert(blorSummary.roc.collect() === sameBlorSummary.roc.collect())
+    assert(blorSummary.pr.collect === sameBlorSummary.pr.collect())
     assert(
-      summary.fMeasureByThreshold.collect() === sameSummary.fMeasureByThreshold.collect())
-    assert(summary.recallByThreshold.collect() === sameSummary.recallByThreshold.collect())
+      blorSummary.fMeasureByThreshold.collect() === sameBlorSummary.fMeasureByThreshold.collect())
     assert(
-      summary.precisionByThreshold.collect() === sameSummary.precisionByThreshold.collect())
+      blorSummary.recallByThreshold.collect() === sameBlorSummary.recallByThreshold.collect())
+    assert(
+      blorSummary.precisionByThreshold.collect() === sameBlorSummary.precisionByThreshold.collect())
+
+    lr.setFamily("multinomial")
+    val mlorModel = lr.fit(smallMultinomialDataset)
+    val mlorSummary = mlorModel.summary
+
+    val mlorSameSummary = mlorModel.evaluate(smallMultinomialDataset)
+
+    assert(mlorSummary.truePositiveRateByLabel === mlorSameSummary.truePositiveRateByLabel)
+    assert(mlorSummary.falsePositiveRateByLabel === mlorSameSummary.falsePositiveRateByLabel)
+    assert(mlorSummary.precisionByLabel === mlorSameSummary.precisionByLabel)
+    assert(mlorSummary.recallByLabel === mlorSameSummary.recallByLabel)
+    assert(mlorSummary.fMeasureByLabel === mlorSameSummary.fMeasureByLabel)
+    assert(mlorSummary.accuracy === mlorSameSummary.accuracy)
+    assert(mlorSummary.weightedTruePositiveRate === mlorSameSummary.weightedTruePositiveRate)
+    assert(mlorSummary.weightedFalsePositiveRate === mlorSameSummary.weightedFalsePositiveRate)
+    assert(mlorSummary.weightedPrecision === mlorSameSummary.weightedPrecision)
+    assert(mlorSummary.weightedRecall === mlorSameSummary.weightedRecall)
+    assert(mlorSummary.weightedFMeasure === mlorSameSummary.weightedFMeasure)
   }
 
   test("evaluate with labels that are not doubles") {
     // Evaluate a test set with Label that is a numeric type other than Double
-    val lr = new LogisticRegression()
+    val blor = new LogisticRegression()
       .setMaxIter(1)
       .setRegParam(1.0)
-    val model = lr.fit(smallBinaryDataset)
-    val summary = model.evaluate(smallBinaryDataset).asInstanceOf[BinaryLogisticRegressionSummary]
+      .setFamily("binomial")
+    val blorModel = blor.fit(smallBinaryDataset)
+    val blorSummary = blorModel.evaluate(smallBinaryDataset)
+      .asInstanceOf[BinaryLogisticRegressionSummary]
+
+    val blorLongLabelData = smallBinaryDataset.select(col(blorModel.getLabelCol).cast(LongType),
+      col(blorModel.getFeaturesCol))
+    val blorLongSummary = blorModel.evaluate(blorLongLabelData)
+      .asInstanceOf[BinaryLogisticRegressionSummary]
+
+    assert(blorSummary.areaUnderROC === blorLongSummary.areaUnderROC)
+
+    val mlor = new LogisticRegression()
+      .setMaxIter(1)
+      .setRegParam(1.0)
+      .setFamily("multinomial")
+    val mlorModel = mlor.fit(smallMultinomialDataset)
+    val mlorSummary = mlorModel.evaluate(smallMultinomialDataset)
 
-    val longLabelData = smallBinaryDataset.select(col(model.getLabelCol).cast(LongType),
-      col(model.getFeaturesCol))
-    val longSummary = model.evaluate(longLabelData).asInstanceOf[BinaryLogisticRegressionSummary]
+    val mlorLongLabelData = smallMultinomialDataset.select(
+      col(mlorModel.getLabelCol).cast(LongType),
+      col(mlorModel.getFeaturesCol))
+    val mlorLongSummary = mlorModel.evaluate(mlorLongLabelData)
 
-    assert(summary.areaUnderROC === longSummary.areaUnderROC)
+    assert(mlorSummary.accuracy === mlorLongSummary.accuracy)
   }
 
   test("statistics on training data") {
     // Test that loss is monotonically decreasing.
-    val lr = new LogisticRegression()
+    val blor = new LogisticRegression()
       .setMaxIter(10)
       .setRegParam(1.0)
-      .setThreshold(0.6)
-    val model = lr.fit(smallBinaryDataset)
+      .setFamily("binomial")
+    val blorModel = blor.fit(smallBinaryDataset)
+    assert(
+      blorModel.summary
+        .objectiveHistory
+        .sliding(2)
+        .forall(x => x(0) >= x(1)))
+
+    val mlor = new LogisticRegression()
+      .setMaxIter(10)
+      .setRegParam(1.0)
+      .setFamily("multinomial")
+    val mlorModel = mlor.fit(smallMultinomialDataset)
     assert(
-      model.summary
+      mlorModel.summary
         .objectiveHistory
         .sliding(2)
         .forall(x => x(0) >= x(1)))
@@ -2392,7 +2439,7 @@ class LogisticRegressionSuite
     predictions3.zip(predictions4).foreach { case (Row(p1: Double), Row(p2: Double)) =>
       assert(p1 === p2)
     }
-    // TODO: check that it converges in a single iteration when model summary is available
+    assert(model4.summary.totalIterations === 1)
   }
 
   test("binary logistic regression with all labels the same") {
@@ -2453,6 +2500,7 @@ class LogisticRegressionSuite
         assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0)))
         assert(pred === 4.0)
     }
+    assert(model.summary.totalIterations === 0)
 
     // force the model to be trained with only one class
     val constantZeroData = Seq(
@@ -2466,6 +2514,7 @@ class LogisticRegressionSuite
         assert(prob === Vectors.dense(Array(1.0)))
         assert(pred === 0.0)
     }
+    assert(modelZeroLabel.summary.totalIterations > 0)
 
     // ensure that the correct value is predicted when numClasses passed through metadata
     val labelMeta = NominalAttribute.defaultAttr.withName("label").withNumValues(6).toMetadata()
@@ -2479,7 +2528,7 @@ class LogisticRegressionSuite
         assert(prob === Vectors.dense(Array(0.0, 0.0, 0.0, 0.0, 1.0, 0.0)))
         assert(pred === 4.0)
     }
-    // TODO: check num iters is zero when it become available in the model
+    require(modelWithMetadata.summary.totalIterations === 0)
   }
 
   test("compressed storage for constant label") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index e7bd4eb9e0ad..f470dca7dbd0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -715,7 +715,7 @@ class LinearRegressionSuite
       assert(modelNoPredictionColFieldNames.exists(s => s.startsWith("prediction_")))
 
       // Residuals in [[LinearRegressionResults]] should equal those manually computed
-      val expectedResiduals = datasetWithDenseFeature.select("features", "label")
+      datasetWithDenseFeature.select("features", "label")
         .rdd
         .map { case Row(features: DenseVector, label: Double) =>
           val prediction =
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9bda917377c2..10a958b7464b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -115,7 +115,25 @@ object MimaExcludes {
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseColMajor"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseMatrix"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toSparseMatrix"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes")
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes"),
+
+    // [SPARK-17139] Add model summary for MultinomialLogisticRegression
+    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary"),
+    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictionCol"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$_setter_$org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics_=")
   ) ++ Seq(
       // [SPARK-17019] Expose on-heap and off-heap memory usage in various places
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerBlockManagerAdded.copy"),

From 17272033e30a9c8d754d51442b9d206396d60d15 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Mon, 24 Apr 2017 16:55:24 -0400
Subject: [PATCH 02/11] fix nits

---
 .../classification/LogisticRegression.scala   |  6 +-
 .../LogisticRegressionSuite.scala             | 60 ++++++++++++++++---
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 1d09b16a035e..2f0118a28596 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1576,13 +1576,11 @@ private class LogisticRegressionTrainingSummaryImpl(
     @Since("1.5.0") val objectiveHistory: Array[Double])
   extends LogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
-  with LogisticRegressionTrainingSummary {
-
-}
+  with LogisticRegressionTrainingSummary
 
 /**
  * :: Experimental ::
- * Multiclass Logistic regression results for a given model.
+ * Multiclass logistic regression results for a given model.
  *
  * @param predictions dataframe output by the model's `transform` method.
  * @param probabilityCol field in "predictions" which gives the probability of
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index cc701dfdeb98..a1e32f33ba88 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -199,15 +199,57 @@ class LogisticRegressionSuite
     }
   }
 
-  test("empty probabilityCol") {
-    val lr = new LogisticRegression().setProbabilityCol("")
-    val model = lr.fit(smallBinaryDataset)
-    assert(model.hasSummary)
-    // Validate that we re-insert a probability column for evaluation
-    val fieldNames = model.summary.predictions.schema.fieldNames
-    assert(smallBinaryDataset.schema.fieldNames.toSet.subsetOf(
-      fieldNames.toSet))
-    assert(fieldNames.exists(s => s.startsWith("probability_")))
+  test("empty probabilityCol or predictionCol") {
+    val lr = new LogisticRegression().setMaxIter(1)
+    val datasetFieldNames = smallBinaryDataset.schema.fieldNames.toSet
+    def checkSummarySchema(model: LogisticRegressionModel, columns: Seq[String]): Unit = {
+      val fieldNames = model.summary.predictions.schema.fieldNames
+      assert(model.hasSummary)
+      assert(datasetFieldNames.subsetOf(fieldNames.toSet))
+      columns.foreach { c => assert(fieldNames.exists(_.startsWith(c))) }
+    }
+    // check that the summary model adds the appropriate columns
+    Seq(("binomial", smallBinaryDataset), ("multinomial", smallMultinomialDataset)).foreach {
+      case (family, dataset) =>
+        lr.setFamily(family)
+        lr.setProbabilityCol("").setPredictionCol("prediction")
+        val modelNoProb = lr.fit(smallBinaryDataset)
+        checkSummarySchema(modelNoProb, Seq("probability_"))
+
+        lr.setProbabilityCol("probability").setPredictionCol("")
+        val modelNoPred = lr.fit(smallBinaryDataset)
+        checkSummarySchema(modelNoPred, Seq("prediction_"))
+
+        lr.setProbabilityCol("").setPredictionCol("")
+        val modelNoPredNoProb = lr.fit(smallBinaryDataset)
+        checkSummarySchema(modelNoPredNoProb, Seq("prediction_", "probability_"))
+    }
+  }
+
+  test("check summary types for binary and multiclass") {
+    val lr = new LogisticRegression()
+      .setFamily("binomial")
+
+    val blorModel = lr.fit(smallBinaryDataset)
+    assert(blorModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+    assert(blorModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+
+    val mlorModel = lr.setFamily("multinomial").fit(smallMultinomialDataset)
+    assert(mlorModel.summary.isInstanceOf[LogisticRegressionTrainingSummaryImpl])
+    withClue("cannot get binary summary for multiclass model") {
+      intercept[RuntimeException] {
+        mlorModel.binarySummary
+      }
+    }
+
+    val mlorBinaryModel = lr.setFamily("multinomial").fit(smallBinaryDataset)
+    assert(mlorBinaryModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+    assert(mlorBinaryModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+
+    val blorSummary = blorModel.evaluate(smallBinaryDataset)
+    val mlorSummary = mlorModel.evaluate(smallMultinomialDataset)
+    assert(blorSummary.isInstanceOf[BinaryLogisticRegressionSummaryImpl])
+    assert(mlorSummary.isInstanceOf[LogisticRegressionSummaryImpl])
   }
 
   test("setThreshold, getThreshold") {

From a96dc54a5743876939e2773d2ad3fe8ccc9b8cd1 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Mon, 8 May 2017 15:29:29 -0400
Subject: [PATCH 03/11] add since tags2

---
 .../classification/LogisticRegression.scala   | 34 ++++++++-----------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 2f0118a28596..65f49c22d221 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1456,9 +1456,11 @@ sealed trait LogisticRegressionSummary extends Serializable {
  */
 sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
 
+  @Since("1.5.0")
   /** objective function (scaled loss + regularization) at each iteration. */
   def objectiveHistory: Array[Double]
 
+  @Since("1.5.0")
   /** Number of training iterations. */
   def totalIterations: Int = objectiveHistory.length
 
@@ -1565,15 +1567,13 @@ sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegre
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
  */
-@Experimental
-@Since("2.2.0")
 private class LogisticRegressionTrainingSummaryImpl(
     override val predictions: DataFrame,
     override val probabilityCol: String,
     override val predictionCol: String,
     override val labelCol: String,
     override val featuresCol: String,
-    @Since("1.5.0") val objectiveHistory: Array[Double])
+    val objectiveHistory: Array[Double])
   extends LogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with LogisticRegressionTrainingSummary
@@ -1590,14 +1590,12 @@ private class LogisticRegressionTrainingSummaryImpl(
  * @param labelCol field in "predictions" which gives the true label of each instance.
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  */
-@Experimental
-@Since("2.2.0")
 private class LogisticRegressionSummaryImpl(
-    @Since("2.2.0") @transient override val predictions: DataFrame,
-    @Since("2.2.0") override val probabilityCol: String,
-    @Since("2.2.0") override val predictionCol: String,
-    @Since("2.2.0") override val labelCol: String,
-    @Since("2.2.0") override val featuresCol: String)
+    @transient override val predictions: DataFrame,
+    override val probabilityCol: String,
+    override val predictionCol: String,
+    override val labelCol: String,
+    override val featuresCol: String)
   extends LogisticRegressionSummary
 
 /**
@@ -1613,15 +1611,13 @@ private class LogisticRegressionSummaryImpl(
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
  */
-@Experimental
-@Since("2.2.0")
 private class BinaryLogisticRegressionTrainingSummaryImpl(
     override val predictions: DataFrame,
     override val probabilityCol: String,
     override val predictionCol: String,
     override val labelCol: String,
     override val featuresCol: String,
-    @Since("1.5.0") override val objectiveHistory: Array[Double])
+    override val objectiveHistory: Array[Double])
   extends BinaryLogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with BinaryLogisticRegressionTrainingSummary
@@ -1638,14 +1634,12 @@ private class BinaryLogisticRegressionTrainingSummaryImpl(
  * @param labelCol field in "predictions" which gives the true label of each instance.
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  */
-@Experimental
-@Since("2.2.0")
 private class BinaryLogisticRegressionSummaryImpl(
-    @Since("2.2.0") @transient override val predictions: DataFrame,
-    @Since("2.2.0") override val probabilityCol: String,
-    @Since("2.2.0") override val predictionCol: String,
-    @Since("2.2.0") override val labelCol: String,
-    @Since("2.2.0") override val featuresCol: String)
+    @transient override val predictions: DataFrame,
+    override val probabilityCol: String,
+    override val predictionCol: String,
+    override val labelCol: String,
+    override val featuresCol: String)
   extends LogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with BinaryLogisticRegressionSummary

From 3c4b995a325d277fa46fefec0db74cbcb75e3b36 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Mon, 15 May 2017 13:24:17 -0400
Subject: [PATCH 04/11] update since tag to 2.3

---
 .../classification/LogisticRegression.scala   | 36 ++++++++---------
 project/MimaExcludes.scala                    | 40 +++++++++----------
 2 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 65f49c22d221..f5dbf4e0e74a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1357,23 +1357,23 @@ sealed trait LogisticRegressionSummary extends Serializable {
   /**
    * Dataframe output by the model's `transform` method.
    */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def predictions: DataFrame
 
   /** Field in "predictions" which gives the probability of each class as a vector. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def probabilityCol: String
 
   /** Field in "predictions" which gives the prediction of each class. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def predictionCol: String
 
   /** Field in "predictions" which gives the true label of each instance (if available). */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def labelCol: String
 
   /** Field in "predictions" which gives the features of each instance as a vector. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def featuresCol: String
 
   @transient private val multiclassMetrics = {
@@ -1385,23 +1385,23 @@ sealed trait LogisticRegressionSummary extends Serializable {
   }
 
   /** Returns true positive rate for each label. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def truePositiveRateByLabel: Array[Double] = recallByLabel
 
   /** Returns false positive rate for each label. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def falsePositiveRateByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.falsePositiveRate(label))
   }
 
   /** Returns precision for each label. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def precisionByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.precision(label))
   }
 
   /** Returns recall for each label. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def recallByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.recall(label))
   }
@@ -1409,43 +1409,43 @@ sealed trait LogisticRegressionSummary extends Serializable {
   /**
    * Returns f-measure for each label.
    */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def fMeasureByLabel(beta: Double): Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.fMeasure(label, beta))
   }
 
   /** Returns f1-measure for each label. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def fMeasureByLabel: Array[Double] = fMeasureByLabel(1.0)
 
   /** Returns accuracy. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def accuracy: Double = multiclassMetrics.accuracy
 
   /** Returns weighted true positive rate. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedTruePositiveRate: Double = weightedRecall
 
   /** Returns weighted false positive rate. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedFalsePositiveRate: Double = multiclassMetrics.weightedFalsePositiveRate
 
   /** Returns weighted averaged recall. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedRecall: Double = multiclassMetrics.weightedRecall
 
   /** Returns weighted averaged precision. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedPrecision: Double = multiclassMetrics.weightedPrecision
 
   /**
    * Returns weighted averaged f-measure.
    */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedFMeasure(beta: Double): Double = multiclassMetrics.weightedFMeasure(beta)
 
   /** Returns weighted averaged f1-measure. */
-  @Since("2.2.0")
+  @Since("2.3.0")
   def weightedFMeasure: Double = multiclassMetrics.weightedFMeasure(1.0)
 }
 
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 10a958b7464b..92dc36e7fee1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -44,7 +44,25 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ShuffleReadMetricDistributions.this"),
 
     // [SPARK-21276] Update lz4-java to the latest (v1.4.0)
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.io.LZ4BlockInputStream")
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.io.LZ4BlockInputStream"),
+
+    // [SPARK-17139] Add model summary for MultinomialLogisticRegression
+    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary"),
+    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictionCol"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$_setter_$org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics_=")
   )
 
   // Exclude rules for 2.2.x
@@ -115,25 +133,7 @@ object MimaExcludes {
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseColMajor"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toDenseMatrix"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.toSparseMatrix"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes"),
-
-    // [SPARK-17139] Add model summary for MultinomialLogisticRegression
-    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary"),
-    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictionCol"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.org$apache$spark$ml$classification$LogisticRegressionSummary$_setter_$org$apache$spark$ml$classification$LogisticRegressionSummary$$multiclassMetrics_=")
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Matrix.getSizeInBytes")
   ) ++ Seq(
       // [SPARK-17019] Expose on-heap and off-heap memory usage in various places
       ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerBlockManagerAdded.copy"),

From deddb00671af3e878b4bb390dd305d6a53f5da68 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Thu, 17 Aug 2017 10:56:17 +0800
Subject: [PATCH 05/11] update

---
 .../classification/LogisticRegression.scala   | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index f5dbf4e0e74a..4aca3ca86f78 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -884,7 +884,7 @@ class LogisticRegression @Since("1.2.0") (
       numClasses, isMultinomial))
 
     val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val logRegSummary = if (numClasses <= 2) {
+    val logRegSummary = if (!isMultinomial) {
       new BinaryLogisticRegressionTrainingSummaryImpl(
         summaryModel.transform(dataset),
         probabilityColName,
@@ -1384,45 +1384,50 @@ sealed trait LogisticRegressionSummary extends Serializable {
         .rdd.map { case Row(prediction: Double, label: Double) => (prediction, label) })
   }
 
-  /** Returns true positive rate for each label. */
+  /** Returns true positive rate for each label (category). */
   @Since("2.3.0")
   def truePositiveRateByLabel: Array[Double] = recallByLabel
 
-  /** Returns false positive rate for each label. */
+  /** Returns false positive rate for each label (category). */
   @Since("2.3.0")
   def falsePositiveRateByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.falsePositiveRate(label))
   }
 
-  /** Returns precision for each label. */
+  /** Returns precision for each label (category). */
   @Since("2.3.0")
   def precisionByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.precision(label))
   }
 
-  /** Returns recall for each label. */
+  /** Returns recall for each label (category). */
   @Since("2.3.0")
   def recallByLabel: Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.recall(label))
   }
 
-  /**
-   * Returns f-measure for each label.
-   */
+  /** Returns f-measure for each label (category). */
   @Since("2.3.0")
   def fMeasureByLabel(beta: Double): Array[Double] = {
     multiclassMetrics.labels.map(label => multiclassMetrics.fMeasure(label, beta))
   }
 
-  /** Returns f1-measure for each label. */
+  /** Returns f1-measure for each label (category). */
   @Since("2.3.0")
   def fMeasureByLabel: Array[Double] = fMeasureByLabel(1.0)
 
-  /** Returns accuracy. */
+  /**
+   * Returns accuracy.
+   * (equals to the total number of correctly classified instances
+   * out of the total number of instances.)
+   */
   @Since("2.3.0")
   def accuracy: Double = multiclassMetrics.accuracy
 
-  /** Returns weighted true positive rate. */
+  /**
+   * Returns weighted true positive rate.
+   * (equals to precision, recall and f-measure)
+   */
   @Since("2.3.0")
   def weightedTruePositiveRate: Double = weightedRecall
 
@@ -1430,7 +1435,10 @@ sealed trait LogisticRegressionSummary extends Serializable {
   @Since("2.3.0")
   def weightedFalsePositiveRate: Double = multiclassMetrics.weightedFalsePositiveRate
 
-  /** Returns weighted averaged recall. */
+  /**
+   * Returns weighted averaged recall.
+   * (equals to precision, recall and f-measure)
+   */
   @Since("2.3.0")
   def weightedRecall: Double = multiclassMetrics.weightedRecall
 
@@ -1438,9 +1446,7 @@ sealed trait LogisticRegressionSummary extends Serializable {
   @Since("2.3.0")
   def weightedPrecision: Double = multiclassMetrics.weightedPrecision
 
-  /**
-   * Returns weighted averaged f-measure.
-   */
+  /** Returns weighted averaged f-measure. */
   @Since("2.3.0")
   def weightedFMeasure(beta: Double): Double = multiclassMetrics.weightedFMeasure(beta)
 
@@ -1456,12 +1462,12 @@ sealed trait LogisticRegressionSummary extends Serializable {
  */
 sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
 
-  @Since("1.5.0")
   /** objective function (scaled loss + regularization) at each iteration. */
+  @Since("1.5.0")
   def objectiveHistory: Array[Double]
 
-  @Since("1.5.0")
   /** Number of training iterations. */
+  @Since("1.5.0")
   def totalIterations: Int = objectiveHistory.length
 
 }

From 2bce87b10905aa61cc7e85084c519cea28ed8793 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Thu, 17 Aug 2017 14:58:16 +0800
Subject: [PATCH 06/11] update

---
 .../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 4aca3ca86f78..c26b93697022 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -884,7 +884,7 @@ class LogisticRegression @Since("1.2.0") (
       numClasses, isMultinomial))
 
     val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val logRegSummary = if (!isMultinomial) {
+    val logRegSummary = if (!isMultinomial || (isMultinomial && numClasses <= 2)) {
       new BinaryLogisticRegressionTrainingSummaryImpl(
         summaryModel.transform(dataset),
         probabilityColName,

From ce95023b44db2d86360144cc69814435a3b2373c Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Thu, 17 Aug 2017 20:04:06 +0800
Subject: [PATCH 07/11] update

---
 .../org/apache/spark/ml/classification/LogisticRegression.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index c26b93697022..5f4009c86e01 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -884,7 +884,7 @@ class LogisticRegression @Since("1.2.0") (
       numClasses, isMultinomial))
 
     val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val logRegSummary = if (!isMultinomial || (isMultinomial && numClasses <= 2)) {
+    val logRegSummary = if (numClasses <=2) {
       new BinaryLogisticRegressionTrainingSummaryImpl(
         summaryModel.transform(dataset),
         probabilityColName,

From b6cde56f18caa85f79b9cd0dc604ae1a46fd4948 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Fri, 18 Aug 2017 15:23:41 +0800
Subject: [PATCH 08/11] update

---
 .../classification/LogisticRegression.scala   | 67 +++++++++++--------
 .../LogisticRegressionSuite.scala             | 21 +++---
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 5f4009c86e01..56144453e185 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -884,7 +884,7 @@ class LogisticRegression @Since("1.2.0") (
       numClasses, isMultinomial))
 
     val (summaryModel, probabilityColName, predictionColName) = model.findSummaryModel()
-    val logRegSummary = if (numClasses <=2) {
+    val logRegSummary = if (numClasses <= 2) {
       new BinaryLogisticRegressionTrainingSummaryImpl(
         summaryModel.transform(dataset),
         probabilityColName,
@@ -1017,15 +1017,19 @@ class LogisticRegressionModel private[spark] (
   private var trainingSummary: Option[LogisticRegressionTrainingSummary] = None
 
   /**
-   * Gets summary of model on training set. An exception is
-   * thrown if `trainingSummary == None`.
+   * Gets summary of model on training set. An exception is thrown
+   * if `trainingSummary == None`.
    */
   @Since("1.5.0")
   def summary: LogisticRegressionTrainingSummary = trainingSummary.getOrElse {
     throw new SparkException("No training summary available for this LogisticRegressionModel")
   }
 
-  @Since("2.2.0")
+  /**
+   * Gets summary of model on training set. An exception is thrown
+   * if `trainingSummary == None` or it is a multiclass model.
+   */
+  @Since("2.3.0")
   def binarySummary: BinaryLogisticRegressionTrainingSummary = summary match {
     case b: BinaryLogisticRegressionTrainingSummary => b
     case _ =>
@@ -1357,11 +1361,11 @@ sealed trait LogisticRegressionSummary extends Serializable {
   /**
    * Dataframe output by the model's `transform` method.
    */
-  @Since("2.3.0")
+  @Since("1.5.0")
   def predictions: DataFrame
 
   /** Field in "predictions" which gives the probability of each class as a vector. */
-  @Since("2.3.0")
+  @Since("1.5.0")
   def probabilityCol: String
 
   /** Field in "predictions" which gives the prediction of each class. */
@@ -1369,11 +1373,11 @@ sealed trait LogisticRegressionSummary extends Serializable {
   def predictionCol: String
 
   /** Field in "predictions" which gives the true label of each instance (if available). */
-  @Since("2.3.0")
+  @Since("1.5.0")
   def labelCol: String
 
   /** Field in "predictions" which gives the features of each instance as a vector. */
-  @Since("2.3.0")
+  @Since("1.6.0")
   def featuresCol: String
 
   @transient private val multiclassMetrics = {
@@ -1384,6 +1388,17 @@ sealed trait LogisticRegressionSummary extends Serializable {
         .rdd.map { case Row(prediction: Double, label: Double) => (prediction, label) })
   }
 
+  /**
+   * Returns the sequence of labels in ascending order
+   *
+   * Note: In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
+   * training set is missing a label, then all of the arrays over labels
+   * (e.g., from truePositiveRateByLabel) will be of length numClasses-1 instead of the
+   * expected numClasses.
+   */
+  @Since("2.3.0")
+  def labels: Array[Double] = multiclassMetrics.labels
+
   /** Returns true positive rate for each label (category). */
   @Since("2.3.0")
   def truePositiveRateByLabel: Array[Double] = recallByLabel
@@ -1561,7 +1576,6 @@ sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegre
   with LogisticRegressionTrainingSummary
 
 /**
- * :: Experimental ::
  * Multiclass logistic regression training results.
  *
  * @param predictions dataframe output by the model's `transform` method.
@@ -1574,18 +1588,17 @@ sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegre
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
  */
 private class LogisticRegressionTrainingSummaryImpl(
-    override val predictions: DataFrame,
-    override val probabilityCol: String,
-    override val predictionCol: String,
-    override val labelCol: String,
-    override val featuresCol: String,
-    val objectiveHistory: Array[Double])
+    predictions: DataFrame,
+    probabilityCol: String,
+    predictionCol: String,
+    labelCol: String,
+    featuresCol: String,
+    override val objectiveHistory: Array[Double])
   extends LogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with LogisticRegressionTrainingSummary
 
 /**
- * :: Experimental ::
  * Multiclass logistic regression results for a given model.
  *
  * @param predictions dataframe output by the model's `transform` method.
@@ -1605,7 +1618,6 @@ private class LogisticRegressionSummaryImpl(
   extends LogisticRegressionSummary
 
 /**
- * :: Experimental ::
  * Binary logistic regression training results.
  *
  * @param predictions dataframe output by the model's `transform` method.
@@ -1618,18 +1630,17 @@ private class LogisticRegressionSummaryImpl(
  * @param objectiveHistory objective function (scaled loss + regularization) at each iteration.
  */
 private class BinaryLogisticRegressionTrainingSummaryImpl(
-    override val predictions: DataFrame,
-    override val probabilityCol: String,
-    override val predictionCol: String,
-    override val labelCol: String,
-    override val featuresCol: String,
+    predictions: DataFrame,
+    probabilityCol: String,
+    predictionCol: String,
+    labelCol: String,
+    featuresCol: String,
     override val objectiveHistory: Array[Double])
   extends BinaryLogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with BinaryLogisticRegressionTrainingSummary
 
 /**
- * :: Experimental ::
  * Binary logistic regression results for a given model.
  *
  * @param predictions dataframe output by the model's `transform` method.
@@ -1641,11 +1652,11 @@ private class BinaryLogisticRegressionTrainingSummaryImpl(
  * @param featuresCol field in "predictions" which gives the features of each instance as a vector.
  */
 private class BinaryLogisticRegressionSummaryImpl(
-    @transient override val predictions: DataFrame,
-    override val probabilityCol: String,
-    override val predictionCol: String,
-    override val labelCol: String,
-    override val featuresCol: String)
+    predictions: DataFrame,
+    probabilityCol: String,
+    predictionCol: String,
+    labelCol: String,
+    featuresCol: String)
   extends LogisticRegressionSummaryImpl(
     predictions, probabilityCol, predictionCol, labelCol, featuresCol)
   with BinaryLogisticRegressionSummary
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index a1e32f33ba88..b4cce849e4fa 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -213,15 +213,15 @@ class LogisticRegressionSuite
       case (family, dataset) =>
         lr.setFamily(family)
         lr.setProbabilityCol("").setPredictionCol("prediction")
-        val modelNoProb = lr.fit(smallBinaryDataset)
+        val modelNoProb = lr.fit(dataset)
         checkSummarySchema(modelNoProb, Seq("probability_"))
 
         lr.setProbabilityCol("probability").setPredictionCol("")
-        val modelNoPred = lr.fit(smallBinaryDataset)
+        val modelNoPred = lr.fit(dataset)
         checkSummarySchema(modelNoPred, Seq("prediction_"))
 
         lr.setProbabilityCol("").setPredictionCol("")
-        val modelNoPredNoProb = lr.fit(smallBinaryDataset)
+        val modelNoPredNoProb = lr.fit(dataset)
         checkSummarySchema(modelNoPredNoProb, Seq("prediction_", "probability_"))
     }
   }
@@ -229,13 +229,14 @@ class LogisticRegressionSuite
   test("check summary types for binary and multiclass") {
     val lr = new LogisticRegression()
       .setFamily("binomial")
+      .setMaxIter(1)
 
     val blorModel = lr.fit(smallBinaryDataset)
-    assert(blorModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
-    assert(blorModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+    assert(blorModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
+    assert(blorModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
 
     val mlorModel = lr.setFamily("multinomial").fit(smallMultinomialDataset)
-    assert(mlorModel.summary.isInstanceOf[LogisticRegressionTrainingSummaryImpl])
+    assert(mlorModel.summary.isInstanceOf[LogisticRegressionTrainingSummary])
     withClue("cannot get binary summary for multiclass model") {
       intercept[RuntimeException] {
         mlorModel.binarySummary
@@ -243,13 +244,13 @@ class LogisticRegressionSuite
     }
 
     val mlorBinaryModel = lr.setFamily("multinomial").fit(smallBinaryDataset)
-    assert(mlorBinaryModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
-    assert(mlorBinaryModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummaryImpl])
+    assert(mlorBinaryModel.summary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
+    assert(mlorBinaryModel.binarySummary.isInstanceOf[BinaryLogisticRegressionTrainingSummary])
 
     val blorSummary = blorModel.evaluate(smallBinaryDataset)
     val mlorSummary = mlorModel.evaluate(smallMultinomialDataset)
-    assert(blorSummary.isInstanceOf[BinaryLogisticRegressionSummaryImpl])
-    assert(mlorSummary.isInstanceOf[LogisticRegressionSummaryImpl])
+    assert(blorSummary.isInstanceOf[BinaryLogisticRegressionSummary])
+    assert(mlorSummary.isInstanceOf[LogisticRegressionSummary])
   }
 
   test("setThreshold, getThreshold") {

From 67c57e547b654ec2816fe4f33e067072a05c4d5e Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Tue, 22 Aug 2017 07:38:18 +0800
Subject: [PATCH 09/11] update mima

---
 project/MimaExcludes.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 92dc36e7fee1..eecda26abb7e 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -50,6 +50,7 @@ object MimaExcludes {
     ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary"),
     ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.predictionCol"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),

From 0ebc943ea12e88e85f2a331e3f6c729ff4ff9aa7 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Wed, 23 Aug 2017 15:05:24 +0800
Subject: [PATCH 10/11] tiny update comment

---
 .../apache/spark/ml/classification/LogisticRegression.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 56144453e185..86526d6f41ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1389,7 +1389,8 @@ sealed trait LogisticRegressionSummary extends Serializable {
   }
 
   /**
-   * Returns the sequence of labels in ascending order
+   * Returns the sequence of labels in ascending order. This order matches the order used
+   * in metrics which are specified as arrays over labels, e.g., truePositiveRateByLabel.
    *
    * Note: In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
    * training set is missing a label, then all of the arrays over labels

From 1395de2c5ab85ea76c690b786c55d459180f1b44 Mon Sep 17 00:00:00 2001
From: WeichenXu <WeichenXu123@outlook.com>
Date: Fri, 25 Aug 2017 09:15:06 +0800
Subject: [PATCH 11/11] add experimental tag

---
 .../ml/classification/LogisticRegression.scala    | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 86526d6f41ba..ffe4b52300c7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV}
-import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
+import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
@@ -1354,8 +1354,10 @@ private[ml] class MultiClassSummarizer extends Serializable {
 }
 
 /**
+ * :: Experimental ::
  * Abstraction for logistic regression results for a given model.
  */
+@Experimental
 sealed trait LogisticRegressionSummary extends Serializable {
 
   /**
@@ -1472,10 +1474,12 @@ sealed trait LogisticRegressionSummary extends Serializable {
 }
 
 /**
+ * :: Experimental ::
  * Abstraction for multiclass logistic regression training results.
  * Currently, the training summary ignores the training weights except
  * for the objective trace.
  */
+@Experimental
 sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary {
 
   /** objective function (scaled loss + regularization) at each iteration. */
@@ -1489,8 +1493,10 @@ sealed trait LogisticRegressionTrainingSummary extends LogisticRegressionSummary
 }
 
 /**
+ * :: Experimental ::
  * Abstraction for binary logistic regression results for a given model.
  */
+@Experimental
 sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary {
 
   private val sparkSession = predictions.sparkSession
@@ -1573,6 +1579,13 @@ sealed trait BinaryLogisticRegressionSummary extends LogisticRegressionSummary {
   }
 }
 
+/**
+ * :: Experimental ::
+ * Abstraction for binary logistic regression training results.
+ * Currently, the training summary ignores the training weights except
+ * for the objective trace.
+ */
+@Experimental
 sealed trait BinaryLogisticRegressionTrainingSummary extends BinaryLogisticRegressionSummary
   with LogisticRegressionTrainingSummary