From a2238ec31ef0ff8e3d5588f2e83d48399a1f75b5 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Wed, 8 Apr 2020 16:56:51 +0900 Subject: [PATCH 1/2] Show a better error message for different python and pip installation mistake --- python/pyspark/find_spark_home.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py index 9c4ed4659863..042a04e3209c 100755 --- a/python/pyspark/find_spark_home.py +++ b/python/pyspark/find_spark_home.py @@ -40,6 +40,7 @@ def is_spark_home(path): paths = ["../", os.path.dirname(os.path.realpath(__file__))] # Add the path of the PySpark module if it exists + import_error_raised = False if sys.version < "3": import imp try: @@ -49,7 +50,7 @@ def is_spark_home(path): paths.append(os.path.join(module_home, "../../")) except ImportError: # Not pip installed no worries - pass + import_error_raised = True else: from importlib.util import find_spec try: @@ -59,7 +60,7 @@ def is_spark_home(path): paths.append(os.path.join(module_home, "../../")) except ImportError: # Not pip installed no worries - pass + import_error_raised = True # Normalize the paths paths = [os.path.abspath(p) for p in paths] @@ -68,6 +69,18 @@ def is_spark_home(path): return next(path for path in paths if is_spark_home(path)) except StopIteration: print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr) + if import_error_raised: + print( + "\nDid you install PySpark via a package manager such as PIP or Conda? If so,\n" + "PySpark was not found in your Python executable. It is possible your\n" + "Python executable does not properly bind with your package manager.\n" + "\nPlease check your default 'python' and if you set PYSPARK_PYTHON and/or\n" + "PYSPARK_DRIVER_PYTHON environment variables, and see if you can import PySpark.\n" + "\nIf you cannot import, you can install by using Python executable directly,\n" + "for example, 'python -m pip install pyspark'. Otherwise, you can also\n" + "explicitly set your Python executable, that has PySpark installed, to\n" + "PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON environment variables, for example,\n" + "'PYSPARK_PYTHON=python3 pyspark'.\n", file=sys.stderr) sys.exit(-1) if __name__ == "__main__": From 099b13708a9801f6f4073163bc1a384778b1a42a Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 9 Apr 2020 10:15:39 +0900 Subject: [PATCH 2/2] Address comments --- python/pyspark/find_spark_home.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py index 042a04e3209c..52f6ea9a3710 100755 --- a/python/pyspark/find_spark_home.py +++ b/python/pyspark/find_spark_home.py @@ -71,14 +71,15 @@ def is_spark_home(path): print("Could not find valid SPARK_HOME while searching {0}".format(paths), file=sys.stderr) if import_error_raised: print( - "\nDid you install PySpark via a package manager such as PIP or Conda? If so,\n" - "PySpark was not found in your Python executable. It is possible your\n" - "Python executable does not properly bind with your package manager.\n" + "\nDid you install PySpark via a package manager such as pip or Conda? If so,\n" + "PySpark was not found in your Python environment. It is possible your\n" + "Python environment does not properly bind with your package manager.\n" "\nPlease check your default 'python' and if you set PYSPARK_PYTHON and/or\n" - "PYSPARK_DRIVER_PYTHON environment variables, and see if you can import PySpark.\n" - "\nIf you cannot import, you can install by using Python executable directly,\n" - "for example, 'python -m pip install pyspark'. Otherwise, you can also\n" - "explicitly set your Python executable, that has PySpark installed, to\n" + "PYSPARK_DRIVER_PYTHON environment variables, and see if you can import\n" + "PySpark, for example, 'python -c 'import pyspark'.\n" + "\nIf you cannot import, you can install by using the Python executable directly,\n" + "for example, 'python -m pip install pyspark [--user]'. Otherwise, you can also\n" + "explicitly set the Python executable, that has PySpark installed, to\n" "PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON environment variables, for example,\n" "'PYSPARK_PYTHON=python3 pyspark'.\n", file=sys.stderr) sys.exit(-1)