diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index 5f70ac6ed8fe..f4201ba28c91 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -36,6 +36,53 @@ Finer-grained cache persistence levels. """ +import os +import re +import sys + +from os.path import isfile, join + +import xml.etree.ElementTree as ET + +if os.environ.get("SPARK_HOME") is None: + raise ImportError("Environment variable SPARK_HOME is undefined.") + +spark_home = os.environ['SPARK_HOME'] +pom_xml_file_path = join(spark_home, 'pom.xml') +snapshot_version = None + +if isfile(pom_xml_file_path): + try: + tree = ET.parse(pom_xml_file_path) + root = tree.getroot() + version_tag = root[4].text + snapshot_version = version_tag[:5] + except: + raise ImportError("Could not read the spark version, because pom.xml file" + + " could not be read.") +else: + try: + lib_file_path = join(spark_home, "lib") + jars = [f for f in os.listdir(lib_file_path) if isfile(join(lib_file_path, f))] + + for jar in jars: + m = re.match(r"^spark-assembly-([0-9\.]+).*\.jar$", jar) + if m is not None and len(m.groups()) > 0: + snapshot_version = m.group(1) + + if snapshot_version is None: + raise ImportError("Could not read the spark version, because pom.xml or spark" + + " assembly jar could not be found.") + except OSError: + raise ImportError("Could not read the spark version, because pom.xml or lib directory" + + " could not be found in SPARK_HOME") + + +from pyspark.pyspark_version import __version__ +if (snapshot_version != __version__): + raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." % + (snapshot_version, __version__)) + from pyspark.conf import SparkConf from pyspark.context import SparkContext diff --git a/python/pyspark/pyspark_version.py b/python/pyspark/pyspark_version.py new file mode 100644 index 000000000000..dd34f30853ac --- /dev/null +++ b/python/pyspark/pyspark_version.py @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__version__ = '1.5.0' diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 000000000000..42aaf1b57e32 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +from setuptools import setup + +exec(compile(open("pyspark/pyspark_version.py").read(), + "pyspark/pyspark_version.py", 'exec')) +VERSION = __version__ + +setup(name='pyspark', + version=VERSION, + description='Apache Spark Python API', + author='Spark Developers', + author_email='dev@spark.apache.org', + url='https://github.com/apache/spark/tree/master/python', + packages=['pyspark', 'pyspark.mllib', 'pyspark.ml', 'pyspark.sql', 'pyspark.streaming'], + install_requires=['py4j==0.9'], + extras_require = { + 'ml': ['numpy>=1.7'], + 'sql': ['pandas'] + }, + license='http://www.apache.org/licenses/LICENSE-2.0', + )