diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index c58555fc9d2c..312c75d112cb 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -49,16 +49,6 @@ Main entry point for accessing data stored in Apache Hive.. """ -# The following block allows us to import python's random instead of mllib.random for scripts in -# mllib that depend on top level pyspark packages, which transitively depend on python's random. -# Since Python's import logic looks for modules in the current package first, we eliminate -# mllib.random as a candidate for C{import random} by removing the first search path, the script's -# location, in order to force the loader to look in Python's top-level modules for C{random}. -import sys -s = sys.path.pop(0) -import random -sys.path.insert(0, s) - from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.sql import SQLContext diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 4149f54931d1..0c7f4910747d 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -24,3 +24,12 @@ import numpy if numpy.version.version < '1.4': raise Exception("MLlib requires NumPy 1.4+") + +__all__ = ['classification', 'clustering', 'linalg', 'random', + 'recommendation', 'regression', 'stat', 'tree', 'util'] + +import sys +import rand as random +random.__name__ = 'random' +random.RandomRDDs.__module__ = __name__ + '.random' +sys.modules[__name__ + '.random'] = random diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index f485a69db1fa..21d46249a73b 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -267,8 +267,4 @@ def _test(): exit(-1) if __name__ == "__main__": - # remove current path from list of search paths to avoid importing mllib.random - # for C{import random}, which is done in an external dependency of pyspark during doctests. - import sys - sys.path.pop(0) _test() diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/rand.py similarity index 100% rename from python/pyspark/mllib/random.py rename to python/pyspark/mllib/rand.py diff --git a/python/run-tests b/python/run-tests index d671da40031c..51df52dbde0f 100755 --- a/python/run-tests +++ b/python/run-tests @@ -73,7 +73,7 @@ run_test "pyspark/mllib/_common.py" run_test "pyspark/mllib/classification.py" run_test "pyspark/mllib/clustering.py" run_test "pyspark/mllib/linalg.py" -run_test "pyspark/mllib/random.py" +run_test "pyspark/mllib/rand.py" run_test "pyspark/mllib/recommendation.py" run_test "pyspark/mllib/regression.py" run_test "pyspark/mllib/stat.py"