Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions python/pyspark/mllib/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import numpy as np

from pyspark import since
from pyspark.ml import linalg as newlinalg
from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
IntegerType, ByteType, BooleanType

Expand Down Expand Up @@ -247,6 +248,15 @@ def toArray(self):
"""
raise NotImplementedError

def asML(self):
"""
Convert this vector to the new mllib-local representation.
This does NOT copy the data; it copies references.

:return: :py:class:`pyspark.ml.linalg.Vector`
"""
raise NotImplementedError


class DenseVector(Vector):
"""
Expand Down Expand Up @@ -408,6 +418,17 @@ def toArray(self):
"""
return self.array

def asML(self):
"""
Convert this vector to the new mllib-local representation.
This does NOT copy the data; it copies references.

:return: :py:class:`pyspark.ml.linalg.DenseVector`

.. versionadded:: 2.0.0
"""
return newlinalg.DenseVector(self.array)

@property
def values(self):
"""
Expand Down Expand Up @@ -737,6 +758,17 @@ def toArray(self):
arr[self.indices] = self.values
return arr

def asML(self):
"""
Convert this vector to the new mllib-local representation.
This does NOT copy the data; it copies references.

:return: :py:class:`pyspark.ml.linalg.SparseVector`

.. versionadded:: 2.0.0
"""
return newlinalg.SparseVector(self.size, self.indices, self.values)

def __len__(self):
return self.size

Expand Down Expand Up @@ -845,6 +877,24 @@ def dense(*elements):
elements = elements[0]
return DenseVector(elements)

@staticmethod
def fromML(vec):
"""
Convert a vector from the new mllib-local representation.
This does NOT copy the data; it copies references.

:param vec: a :py:class:`pyspark.ml.linalg.Vector`
:return: a :py:class:`pyspark.mllib.linalg.Vector`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versionadded


.. versionadded:: 2.0.0
"""
if isinstance(vec, newlinalg.DenseVector):
return DenseVector(vec.array)
elif isinstance(vec, newlinalg.SparseVector):
return SparseVector(vec.size, vec.indices, vec.values)
else:
raise TypeError("Unsupported vector type %s" % type(vec))

@staticmethod
def stringify(vector):
"""
Expand Down Expand Up @@ -945,6 +995,13 @@ def toArray(self):
"""
raise NotImplementedError

def asML(self):
"""
Convert this matrix to the new mllib-local representation.
This does NOT copy the data; it copies references.
"""
raise NotImplementedError

@staticmethod
def _convert_to_array(array_like, dtype):
"""
Expand Down Expand Up @@ -1044,6 +1101,17 @@ def toSparse(self):

return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)

def asML(self):
"""
Convert this matrix to the new mllib-local representation.
This does NOT copy the data; it copies references.

:return: :py:class:`pyspark.ml.linalg.DenseMatrix`

.. versionadded:: 2.0.0
"""
return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed)
Copy link
Contributor

@MechCoder MechCoder Jun 30, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"> 79 ;)


def __getitem__(self, indices):
i, j = indices
if i < 0 or i >= self.numRows:
Expand Down Expand Up @@ -1216,6 +1284,18 @@ def toDense(self):
densevals = np.ravel(self.toArray(), order='F')
return DenseMatrix(self.numRows, self.numCols, densevals)

def asML(self):
"""
Convert this matrix to the new mllib-local representation.
This does NOT copy the data; it copies references.

:return: :py:class:`pyspark.ml.linalg.SparseMatrix`

.. versionadded:: 2.0.0
"""
return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices,
self.values, self.isTransposed)

# TODO: More efficient implementation:
def __eq__(self, other):
return np.all(self.toArray() == other.toArray())
Expand All @@ -1236,6 +1316,25 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values):
"""
return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)

@staticmethod
def fromML(mat):
"""
Convert a matrix from the new mllib-local representation.
This does NOT copy the data; it copies references.

:param mat: a :py:class:`pyspark.ml.linalg.Matrix`
:return: a :py:class:`pyspark.mllib.linalg.Matrix`
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versionadded


.. versionadded:: 2.0.0
"""
if isinstance(mat, newlinalg.DenseMatrix):
return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed)
elif isinstance(mat, newlinalg.SparseMatrix):
return SparseMatrix(mat.numRows, mat.numCols, mat.colPtrs, mat.rowIndices,
mat.values, mat.isTransposed)
else:
raise TypeError("Unsupported matrix type %s" % type(mat))


class QRDecomposition(object):
"""
Expand Down
69 changes: 69 additions & 0 deletions python/pyspark/mllib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import unittest

from pyspark import SparkContext
import pyspark.ml.linalg as newlinalg
from pyspark.mllib.common import _to_java_object_rdd
from pyspark.mllib.clustering import StreamingKMeans, StreamingKMeansModel
from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,\
Expand Down Expand Up @@ -423,6 +424,74 @@ def test_norms(self):
tmp = SparseVector(4, [0, 2], [3, 0])
self.assertEqual(tmp.numNonzeros(), 1)

def test_ml_mllib_vector_conversion(self):
# to ml
# dense
mllibDV = Vectors.dense([1, 2, 3])
mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
mlDV2 = mllibDV.asML()
self.assertEqual(mlDV2, mlDV1)
# sparse
mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV2 = mllibSV.asML()
self.assertEqual(mlSV2, mlSV1)
# from ml
# dense
mllibDV1 = Vectors.dense([1, 2, 3])
mlDV = newlinalg.Vectors.dense([1, 2, 3])
mllibDV2 = Vectors.fromML(mlDV)
self.assertEqual(mllibDV1, mllibDV2)
# sparse
mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
mllibSV2 = Vectors.fromML(mlSV)
self.assertEqual(mllibSV1, mllibSV2)

def test_ml_mllib_matrix_conversion(self):
# to ml
# dense
mllibDM = Matrices.dense(2, 2, [0, 1, 2, 3])
mlDM1 = newlinalg.Matrices.dense(2, 2, [0, 1, 2, 3])
mlDM2 = mllibDM.asML()
self.assertEqual(mlDM2, mlDM1)
# transposed
mllibDMt = DenseMatrix(2, 2, [0, 1, 2, 3], True)
mlDMt1 = newlinalg.DenseMatrix(2, 2, [0, 1, 2, 3], True)
mlDMt2 = mllibDMt.asML()
self.assertEqual(mlDMt2, mlDMt1)
# sparse
mllibSM = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
mlSM1 = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
mlSM2 = mllibSM.asML()
self.assertEqual(mlSM2, mlSM1)
# transposed
mllibSMt = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
mlSMt1 = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
mlSMt2 = mllibSMt.asML()
self.assertEqual(mlSMt2, mlSMt1)
# from ml
# dense
mllibDM1 = Matrices.dense(2, 2, [1, 2, 3, 4])
mlDM = newlinalg.Matrices.dense(2, 2, [1, 2, 3, 4])
mllibDM2 = Matrices.fromML(mlDM)
self.assertEqual(mllibDM1, mllibDM2)
# transposed
mllibDMt1 = DenseMatrix(2, 2, [1, 2, 3, 4], True)
mlDMt = newlinalg.DenseMatrix(2, 2, [1, 2, 3, 4], True)
mllibDMt2 = Matrices.fromML(mlDMt)
self.assertEqual(mllibDMt1, mllibDMt2)
# sparse
mllibSM1 = Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
mlSM = newlinalg.Matrices.sparse(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
mllibSM2 = Matrices.fromML(mlSM)
self.assertEqual(mllibSM1, mllibSM2)
# transposed
mllibSMt1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
mlSMt = newlinalg.SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
mllibSMt2 = Matrices.fromML(mlSMt)
self.assertEqual(mllibSMt1, mllibSMt2)


class ListTests(MLlibTestCase):

Expand Down