Skip to content

Commit d52e763

Browse files
committed
Remove no-longer-needed slice code and handle review comments
1 parent ea5a25a commit d52e763

File tree

4 files changed

+4
-67
lines changed

4 files changed

+4
-67
lines changed

mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import org.apache.spark.rdd.RDD
3232
* :: DeveloperApi ::
3333
* The Java stubs necessary for the Python mllib bindings.
3434
*
35-
* See mllib/python/pyspark._common.py for the mutually agreed upon data format.
35+
* See python/pyspark/mllib/_common.py for the mutually agreed upon data format.
3636
*/
3737
@DeveloperApi
3838
class PythonMLLibAPI extends Serializable {

mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ trait Vector extends Serializable {
6060
* @param i index
6161
*/
6262
private[mllib] def apply(i: Int): Double = toBreeze(i)
63-
64-
private[mllib] def slice(start: Int, end: Int): Vector
6563
}
6664

6765
/**
@@ -159,10 +157,6 @@ class DenseVector(val values: Array[Double]) extends Vector {
159157
private[mllib] override def toBreeze: BV[Double] = new BDV[Double](values)
160158

161159
override def apply(i: Int) = values(i)
162-
163-
private[mllib] override def slice(start: Int, end: Int): Vector = {
164-
new DenseVector(values.slice(start, end))
165-
}
166160
}
167161

168162
/**
@@ -193,39 +187,4 @@ class SparseVector(
193187
}
194188

195189
private[mllib] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
196-
197-
override def apply(pos: Int): Double = {
198-
// A more efficient apply() than creating a new Breeze vector
199-
var i = 0
200-
while (i < indices.length) {
201-
if (indices(i) == pos) {
202-
return values(i)
203-
} else if (indices(i) > pos) {
204-
return 0.0
205-
}
206-
i += 1
207-
}
208-
0.0
209-
}
210-
211-
private[mllib] override def slice(start: Int, end: Int): Vector = {
212-
require(start <= end, s"invalid range: ${start} to ${end}")
213-
require(start >= 0, s"invalid range: ${start} to ${end}")
214-
require(end <= size, s"invalid range: ${start} to ${end}")
215-
// Figure out the range of indices that fall within the given bounds
216-
var i = 0
217-
var indexRangeStart = 0
218-
var indexRangeEnd = 0
219-
while (i < indices.length && indices(i) < start) {
220-
i += 1
221-
}
222-
indexRangeStart = i
223-
while (i < indices.length && indices(i) < end) {
224-
i += 1
225-
}
226-
indexRangeEnd = i
227-
val newIndices = indices.slice(indexRangeStart, indexRangeEnd).map(_ - start)
228-
val newValues = values.slice(indexRangeStart, indexRangeEnd)
229-
new SparseVector(end - start, newIndices, newValues)
230-
}
231190
}

mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -100,24 +100,4 @@ class VectorsSuite extends FunSuite {
100100
assert(vec2(6) === 4.0)
101101
assert(vec2(7) === 0.0)
102102
}
103-
104-
test("slicing dense vectors") {
105-
val vec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
106-
val slice = vec.slice(1, 3)
107-
assert(slice === Vectors.dense(2.0, 3.0))
108-
assert(slice.isInstanceOf[DenseVector], "slice was not DenseVector")
109-
}
110-
111-
test("slicing sparse vectors") {
112-
val vec = Vectors.sparse(7, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
113-
val slice = vec.slice(1, 5)
114-
assert(slice === Vectors.sparse(4, Array(1,3), Array(2.0, 3.0)))
115-
assert(slice.isInstanceOf[SparseVector], "slice was not SparseVector")
116-
val slice2 = vec.slice(1, 2)
117-
assert(slice2 === Vectors.sparse(1, Array(), Array()))
118-
assert(slice2.isInstanceOf[SparseVector], "slice was not SparseVector")
119-
val slice3 = vec.slice(6, 7)
120-
assert(slice3 === Vectors.sparse(1, Array(0), Array(4.0)))
121-
assert(slice3.isInstanceOf[SparseVector], "slice was not SparseVector")
122-
}
123103
}

python/pyspark/mllib/_common.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import struct
1919
import numpy
20-
from numpy import ndarray, float64, int64, int32, ones, array_equal, array, dot, shape, complex, issubdtype
20+
from numpy import ndarray, float64, int64, int32, array_equal, array
2121
from pyspark import SparkContext, RDD
2222
from pyspark.mllib.linalg import SparseVector
2323
from pyspark.serializers import Serializer
@@ -243,8 +243,6 @@ def _deserialize_double_matrix(ba):
243243

244244
def _serialize_labeled_point(p):
245245
"""Serialize a LabeledPoint with a features vector of any type."""
246-
#from pyspark.mllib.regression import LabeledPoint
247-
#assert type(p) == LabeledPoint, "Expected a LabeledPoint object"
248246
from pyspark.mllib.regression import LabeledPoint
249247
serialized_features = _serialize_double_vector(p.features)
250248
header = bytearray(9)
@@ -318,9 +316,9 @@ def _get_initial_weights(initial_weights, data):
318316
if initial_weights.ndim != 1:
319317
raise TypeError("At least one data element has "
320318
+ initial_weights.ndim + " dimensions, which is not 1")
321-
initial_weights = numpy.ones([initial_weights.shape[0]])
319+
initial_weights = numpy.zeros([initial_weights.shape[0]])
322320
elif type(initial_weights) == SparseVector:
323-
initial_weights = numpy.ones([initial_weights.size])
321+
initial_weights = numpy.zeros([initial_weights.size])
324322
return initial_weights
325323

326324

0 commit comments

Comments
 (0)