diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 71ab46b61d7fa..7f4a4737c7837 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -242,7 +242,7 @@ def train(cls, data, lambda_=1.0): @param lambda_: The smoothing parameter """ sc = data.context - dataBytes = _get_unmangled_labeled_point_rdd(data) + dataBytes = _get_unmangled_labeled_point_rdd(data, cache=False) ans = sc._jvm.PythonMLLibAPI().trainNaiveBayes(dataBytes._jrdd, lambda_) return NaiveBayesModel( _deserialize_double_vector(ans[0]), diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index f3e952a1d842a..46b41b9199ce1 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -22,7 +22,7 @@ _get_unmangled_rdd, _get_unmangled_double_vector_rdd, _squared_distance, \ _serialize_double_matrix, _deserialize_double_matrix, \ _serialize_double_vector, _deserialize_double_vector, \ - _get_initial_weights, _serialize_rating, _regression_train_wrapper + _get_initial_weights, _serialize_rating from pyspark.mllib.linalg import SparseVector __all__ = ['KMeansModel', 'KMeans'] diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 2df23394da6f8..6c07adc98a949 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -20,8 +20,8 @@ _get_unmangled_rdd, _get_unmangled_double_vector_rdd, \ _serialize_double_matrix, _deserialize_double_matrix, \ _serialize_double_vector, _deserialize_double_vector, \ - _get_initial_weights, _serialize_rating, _regression_train_wrapper, \ - _serialize_tuple, RatingDeserializer + _get_initial_weights, _serialize_rating, _serialize_tuple, \ + RatingDeserializer from pyspark.rdd import RDD __all__ = ['MatrixFactorizationModel', 'ALS'] @@ -65,7 +65,7 @@ class ALS(object): @classmethod def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1): sc = ratings.context - ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating) + ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating, cache=False) mod = sc._jvm.PythonMLLibAPI().trainALSModel( ratingBytes._jrdd, rank, iterations, lambda_, blocks) return MatrixFactorizationModel(sc, mod) @@ -73,7 +73,7 @@ def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1): @classmethod def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01): sc = ratings.context - ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating) + ratingBytes = _get_unmangled_rdd(ratings, _serialize_rating, cache=False) mod = sc._jvm.PythonMLLibAPI().trainImplicitALSModel( ratingBytes._jrdd, rank, iterations, lambda_, blocks, alpha) return MatrixFactorizationModel(sc, mod) diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index 5b13ab682bbfc..7f5d0b14c281a 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -161,7 +161,7 @@ def trainClassifier(data, numClasses, categoricalFeaturesInfo, :return: DecisionTreeModel """ sc = data.context - dataBytes = _get_unmangled_labeled_point_rdd(data) + dataBytes = _get_unmangled_labeled_point_rdd(data, cache=False) categoricalFeaturesInfoJMap = \ MapConverter().convert(categoricalFeaturesInfo, sc._gateway._gateway_client) @@ -169,7 +169,6 @@ def trainClassifier(data, numClasses, categoricalFeaturesInfo, dataBytes._jrdd, "classification", numClasses, categoricalFeaturesInfoJMap, impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain) - dataBytes.unpersist() return DecisionTreeModel(sc, model) @staticmethod @@ -196,7 +195,7 @@ def trainRegressor(data, categoricalFeaturesInfo, :return: DecisionTreeModel """ sc = data.context - dataBytes = _get_unmangled_labeled_point_rdd(data) + dataBytes = _get_unmangled_labeled_point_rdd(data, cache=False) categoricalFeaturesInfoJMap = \ MapConverter().convert(categoricalFeaturesInfo, sc._gateway._gateway_client) @@ -204,7 +203,6 @@ def trainRegressor(data, categoricalFeaturesInfo, dataBytes._jrdd, "regression", 0, categoricalFeaturesInfoJMap, impurity, maxDepth, maxBins, minInstancesPerNode, minInfoGain) - dataBytes.unpersist() return DecisionTreeModel(sc, model)