apache · alope107 · Aug 19, 2015 · Aug 19, 2015 · Aug 19, 2015 · Aug 20, 2015
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
@@ -36,6 +36,53 @@
       Finer-grained cache persistence levels.
 
 """
+import os
+import re
+import sys
+
+from os.path import isfile, join
+
+import xml.etree.ElementTree as ET
+
+if os.environ.get("SPARK_HOME") is None:
+    raise ImportError("Environment variable SPARK_HOME is undefined.")
+
+spark_home = os.environ['SPARK_HOME']
+pom_xml_file_path = join(spark_home, 'pom.xml')
+snapshot_version = None
+
+if isfile(pom_xml_file_path):
+    try:
+        tree = ET.parse(pom_xml_file_path)
+        root = tree.getroot()
+        version_tag = root[4].text
+        snapshot_version = version_tag[:5]
+    except:
+        raise ImportError("Could not read the spark version, because pom.xml file" +
+                          " could not be read.")
+else:
+    try:
+        lib_file_path = join(spark_home, "lib")
+        jars = [f for f in os.listdir(lib_file_path) if isfile(join(lib_file_path, f))]
+
+        for jar in jars:
+            m = re.match(r"^spark-assembly-([0-9\.]+).*\.jar$", jar)
+            if m is not None and len(m.groups()) > 0:
+                snapshot_version = m.group(1)
+
+        if snapshot_version is None:
+            raise ImportError("Could not read the spark version, because pom.xml or spark" +
+                              " assembly jar could not be found.")
+    except OSError:
+        raise ImportError("Could not read the spark version, because pom.xml or lib directory" +
+                          " could not be found in SPARK_HOME")
+
+
+from pyspark.pyspark_version import __version__
+if (snapshot_version != __version__):
+    raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." %
+                      (snapshot_version, __version__))
+
 
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext

diff --git a/python/pyspark/pyspark_version.py b/python/pyspark/pyspark_version.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+__version__ = '1.5.0'
diff --git a/python/setup.py b/python/setup.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+from setuptools import setup
+
+exec(compile(open("pyspark/pyspark_version.py").read(), 
+   "pyspark/pyspark_version.py", 'exec'))
+VERSION = __version__
+
+setup(name='pyspark',
+    version=VERSION,
+    description='Apache Spark Python API',
+    author='Spark Developers',
+    author_email='dev@spark.apache.org',
+    url='https://github.com/apache/spark/tree/master/python',
+    packages=['pyspark', 'pyspark.mllib', 'pyspark.ml', 'pyspark.sql', 'pyspark.streaming'],
+    install_requires=['py4j==0.9'],
+    extras_require = {
+        'ml': ['numpy>=1.7'],
+        'sql': ['pandas'] 
+    },
+    license='http://www.apache.org/licenses/LICENSE-2.0',
+    )