From fbc3419f098e3fd838d36e2a10bcd869ec373098 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Wed, 14 Sep 2016 21:50:59 +0900 Subject: [PATCH 1/6] Fix more same instances --- .../spark/mllib/linalg/distributed/RowMatrix.scala | 8 ++++---- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 ++- .../catalyst/expressions/conditionalExpressions.scala | 3 ++- .../spark/sql/catalyst/expressions/ordering.scala | 3 ++- .../spark/sql/catalyst/util/QuantileSummaries.scala | 10 +++++----- .../org/apache/spark/sql/hive/HiveInspectors.scala | 6 ++++-- .../scala/org/apache/spark/sql/hive/TableReader.scala | 3 ++- .../scala/org/apache/spark/sql/hive/hiveUDFs.scala | 3 ++- .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 6 ++++-- 9 files changed, 27 insertions(+), 18 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index ec32e37afb79..06b281d1fe49 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -586,7 +586,7 @@ class RowMatrix @Since("1.0.0") ( colMags: Array[Double], gamma: Double): CoordinateMatrix = { require(gamma > 1.0, s"Oversampling should be greater than 1: $gamma") - require(colMags.size == this.numCols(), "Number of magnitudes didn't match column dimension") + require(colMags.length == this.numCols(), "Number of magnitudes didn't match column dimension") val sg = math.sqrt(gamma) // sqrt(gamma) used many times // Don't divide by zero for those columns with zero magnitude @@ -601,11 +601,11 @@ class RowMatrix @Since("1.0.0") ( val q = qBV.value val rand = new XORShiftRandom(indx) - val scaled = new Array[Double](p.size) + val scaled = new Array[Double](p.length) iter.flatMap { row => row match { case SparseVector(size, indices, values) => - val nnz = indices.size + val nnz = indices.length var k = 0 while (k < nnz) { scaled(k) = values(k) / q(indices(k)) @@ -630,7 +630,7 @@ class RowMatrix @Since("1.0.0") ( buf }.flatten case DenseVector(values) => - val n = values.size + val n = values.length var i = 0 while (i < n) { scaled(i) = values(i) / q(i) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 18f814d6cdfd..933bef349518 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1659,7 +1659,8 @@ class Analyzer( // child of it. var currentChild = child var i = 0 - while (i < groupedWindowExpressions.size) { + val size = groupedWindowExpressions.size + while (i < size) { val ((partitionSpec, orderSpec), windowExpressions) = groupedWindowExpressions(i) // Set currentChild to the newly created Window operator. currentChild = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 1dd70bcfcfe8..71d4e9a3c947 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -125,7 +125,8 @@ abstract class CaseWhenBase( override def eval(input: InternalRow): Any = { var i = 0 - while (i < branches.size) { + val size = branches.size + while (i < size) { if (java.lang.Boolean.TRUE.equals(branches(i)._1.eval(input))) { return branches(i)._2.eval(input) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala index 6112259fed61..9a892905f518 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala @@ -31,7 +31,8 @@ class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow def compare(a: InternalRow, b: InternalRow): Int = { var i = 0 - while (i < ordering.size) { + val size = ordering.size + while (i < size) { val order = ordering(i) val left = order.child.eval(a) val right = order.child.eval(b) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala index fd62bd511fac..27928c493d5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala @@ -91,10 +91,10 @@ class QuantileSummaries( var sampleIdx = 0 // The index of the sample currently being inserted. var opsIdx: Int = 0 - while(opsIdx < sorted.length) { + while (opsIdx < sorted.length) { val currentSample = sorted(opsIdx) // Add all the samples before the next observation. - while(sampleIdx < sampled.size && sampled(sampleIdx).value <= currentSample) { + while (sampleIdx < sampled.length && sampled(sampleIdx).value <= currentSample) { newSamples += sampled(sampleIdx) sampleIdx += 1 } @@ -102,7 +102,7 @@ class QuantileSummaries( // If it is the first one to insert, of if it is the last one currentCount += 1 val delta = - if (newSamples.isEmpty || (sampleIdx == sampled.size && opsIdx == sorted.length - 1)) { + if (newSamples.isEmpty || (sampleIdx == sampled.length && opsIdx == sorted.length - 1)) { 0 } else { math.floor(2 * relativeError * currentCount).toInt @@ -114,7 +114,7 @@ class QuantileSummaries( } // Add all the remaining existing samples - while(sampleIdx < sampled.size) { + while (sampleIdx < sampled.length) { newSamples += sampled(sampleIdx) sampleIdx += 1 } @@ -195,7 +195,7 @@ class QuantileSummaries( // Minimum rank at current sample var minRank = 0 var i = 1 - while (i < sampled.size - 1) { + while (i < sampled.length - 1) { val curSample = sampled(i) minRank += curSample.g val maxRank = minRank + curSample.delta diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 4e74452f6cd1..e4b963efeaf1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -703,7 +703,8 @@ private[hive] trait HiveInspectors { // 1. create the pojo (most likely) object val result = x.create() var i = 0 - while (i < fieldRefs.size) { + val size = fieldRefs.size + while (i < size) { // 2. set the property for the pojo val tpe = structType(i).dataType x.setStructFieldData( @@ -720,7 +721,8 @@ private[hive] trait HiveInspectors { val row = a.asInstanceOf[InternalRow] val result = new java.util.ArrayList[AnyRef](fieldRefs.size) var i = 0 - while (i < fieldRefs.size) { + val size = fieldRefs.size + while (i < size) { val tpe = structType(i).dataType result.add(wrap(row.get(i, tpe), fieldRefs.get(i).getFieldObjectInspector, tpe)) i += 1 diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index b4808fdbed9c..ec7e53efc87f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -427,7 +427,8 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging { iterator.map { value => val raw = converter.convert(rawDeser.deserialize(value)) var i = 0 - while (i < fieldRefs.length) { + val length = fieldRefs.length + while (i < length) { val fieldValue = soi.getStructFieldData(raw, fieldRefs(i)) if (fieldValue == null) { mutableRow.setNullAt(fieldOrdinals(i)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala index 9347aeb8e09a..962dd5a52ebc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala @@ -153,7 +153,8 @@ private[hive] case class HiveGenericUDF( returnInspector // Make sure initialized. var i = 0 - while (i < children.length) { + val length = children.length + while (i < length) { val idx = i deferredObjects(i).asInstanceOf[DeferredObjectAdapter] .set(() => children(idx).eval(input)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 286197b50e22..03b508e11aa7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -190,7 +190,8 @@ private[orc] class OrcSerializer(dataSchema: StructType, conf: Configuration) row: InternalRow): Unit = { val fieldRefs = oi.getAllStructFieldRefs var i = 0 - while (i < fieldRefs.size) { + val size = fieldRefs.size + while (i < size) { oi.setStructFieldData( struct, @@ -289,7 +290,8 @@ private[orc] object OrcRelation extends HiveInspectors { iterator.map { value => val raw = deserializer.deserialize(value) var i = 0 - while (i < fieldRefs.length) { + val length = fieldRefs.length + while (i < length) { val fieldValue = oi.getStructFieldData(raw, fieldRefs(i)) if (fieldValue == null) { mutableRow.setNullAt(fieldOrdinals(i)) From ecd3ddeae5ae85b8378c0d361565a788315bbcbd Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Wed, 14 Sep 2016 22:03:47 +0900 Subject: [PATCH 2/6] Last instance --- .../apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 132472ad0ce8..6115552cd978 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -369,7 +369,7 @@ object JdbcUtils extends Logging { val bytes = rs.getBytes(pos + 1) var ans = 0L var j = 0 - while (j < bytes.size) { + while (j < bytes.length) { ans = 256 * ans + (255 & bytes(j)) j = j + 1 } From 996e2922aae07675524afd2e3d6f02fc2310dedd Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Wed, 14 Sep 2016 22:38:46 +0900 Subject: [PATCH 3/6] Revert unrelated changes and use funtional transformation for a case --- .../apache/spark/mllib/linalg/distributed/RowMatrix.scala | 8 ++++---- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 8 +------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 06b281d1fe49..ec32e37afb79 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -586,7 +586,7 @@ class RowMatrix @Since("1.0.0") ( colMags: Array[Double], gamma: Double): CoordinateMatrix = { require(gamma > 1.0, s"Oversampling should be greater than 1: $gamma") - require(colMags.length == this.numCols(), "Number of magnitudes didn't match column dimension") + require(colMags.size == this.numCols(), "Number of magnitudes didn't match column dimension") val sg = math.sqrt(gamma) // sqrt(gamma) used many times // Don't divide by zero for those columns with zero magnitude @@ -601,11 +601,11 @@ class RowMatrix @Since("1.0.0") ( val q = qBV.value val rand = new XORShiftRandom(indx) - val scaled = new Array[Double](p.length) + val scaled = new Array[Double](p.size) iter.flatMap { row => row match { case SparseVector(size, indices, values) => - val nnz = indices.length + val nnz = indices.size var k = 0 while (k < nnz) { scaled(k) = values(k) / q(indices(k)) @@ -630,7 +630,7 @@ class RowMatrix @Since("1.0.0") ( buf }.flatten case DenseVector(values) => - val n = values.length + val n = values.size var i = 0 while (i < n) { scaled(i) = values(i) / q(i) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 933bef349518..979adbd7de28 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1658,10 +1658,7 @@ class Analyzer( // Third, for every Window Spec, we add a Window operator and set currentChild as the // child of it. var currentChild = child - var i = 0 - val size = groupedWindowExpressions.size - while (i < size) { - val ((partitionSpec, orderSpec), windowExpressions) = groupedWindowExpressions(i) + groupedWindowExpressions.foreach { case ((partitionSpec, orderSpec), windowExpressions) => // Set currentChild to the newly created Window operator. currentChild = Window( @@ -1669,9 +1666,6 @@ class Analyzer( partitionSpec, orderSpec, currentChild) - - // Move to next Window Spec. - i += 1 } // Finally, we create a Project to output currentChild's output From cb536a411784496a20ea71c0637639119d51ca95 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Thu, 15 Sep 2016 21:54:04 +0900 Subject: [PATCH 4/6] Address comment --- .../spark/sql/catalyst/analysis/Analyzer.scala | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 979adbd7de28..0569848566a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1657,16 +1657,12 @@ class Analyzer( // Third, for every Window Spec, we add a Window operator and set currentChild as the // child of it. - var currentChild = child - groupedWindowExpressions.foreach { case ((partitionSpec, orderSpec), windowExpressions) => - // Set currentChild to the newly created Window operator. - currentChild = - Window( - windowExpressions, - partitionSpec, - orderSpec, - currentChild) - } + val currentChild = + groupedWindowExpressions.foldLeft(child) { + case (last, ((partitionSpec, orderSpec), windowExpressions)) => + // Set currentChild to the newly created Window operator. + Window(windowExpressions, partitionSpec, orderSpec, last) + } // Finally, we create a Project to output currentChild's output // newExpressionsWithWindowFunctions. From f5e81315c088a33ea55cbcf7f363253c51545581 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 17 Sep 2016 22:31:34 +0900 Subject: [PATCH 5/6] Fix comments --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 0569848566a4..bd0ea93bb349 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1655,18 +1655,17 @@ class Analyzer( } }.toSeq - // Third, for every Window Spec, we add a Window operator and set currentChild as the - // child of it. - val currentChild = + // Third, we aggregate them by adding each Window operator for each Window Spec and then + // setting the next to the child of the Window operator. + val windowOps = groupedWindowExpressions.foldLeft(child) { case (last, ((partitionSpec, orderSpec), windowExpressions)) => - // Set currentChild to the newly created Window operator. Window(windowExpressions, partitionSpec, orderSpec, last) } - // Finally, we create a Project to output currentChild's output + // Finally, we create a Project to output windowOps's output // newExpressionsWithWindowFunctions. - Project(currentChild.output ++ newExpressionsWithWindowFunctions, currentChild) + Project(windowOps.output ++ newExpressionsWithWindowFunctions, windowOps) } // end of addWindow // We have to use transformDown at here to make sure the rule of From 8a3d293302ba87629a7a7247a7c3912e294e3752 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Sat, 17 Sep 2016 22:34:33 +0900 Subject: [PATCH 6/6] Correct the comments more --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index bd0ea93bb349..dad163817cd1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1656,7 +1656,7 @@ class Analyzer( }.toSeq // Third, we aggregate them by adding each Window operator for each Window Spec and then - // setting the next to the child of the Window operator. + // setting this to the child of the next Window operator. val windowOps = groupedWindowExpressions.foldLeft(child) { case (last, ((partitionSpec, orderSpec), windowExpressions)) =>