From 3039465373176d8e764878271abb26bcd1f28007 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 11 Dec 2019 16:46:19 +0800 Subject: [PATCH 1/6] Use python3 in Docker release image --- dev/create-release/spark-rm/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index cc7da152c7b2..eabf3a3b6c81 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -62,14 +62,14 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ curl -sL https://deb.nodesource.com/setup_11.x | bash && \ $APT_INSTALL nodejs && \ # Install needed python packages. Use pip for installing packages (for consistency). - $APT_INSTALL libpython2.7-dev libpython3-dev python-pip python3-pip && \ - pip install $BASE_PIP_PKGS && \ - pip install $PIP_PKGS && \ + $APT_INSTALL libpython3-dev python3-pip && \ + pip3 install $BASE_PIP_PKGS && \ + pip3 install $PIP_PKGS && \ cd && \ virtualenv -p python3 /opt/p35 && \ . /opt/p35/bin/activate && \ - pip install $BASE_PIP_PKGS && \ - pip install $PIP_PKGS && \ + pip3 install $BASE_PIP_PKGS && \ + pip3 install $PIP_PKGS && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ From 9832f05f4b324ca15dc5ecb7da05133217ea428d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 11 Dec 2019 17:31:33 +0800 Subject: [PATCH 2/6] Change default python version to python3. --- dev/create-release/spark-rm/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index eabf3a3b6c81..6d1d96c8c46a 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -70,6 +70,10 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ . /opt/p35/bin/activate && \ pip3 install $BASE_PIP_PKGS && \ pip3 install $PIP_PKGS && \ + # Change default python version to python3. + update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ + update-alternatives --set python /usr/bin/python3.6 && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ From 98cd80aac0708094a1a66a4e9c629f24f0fd762a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 12 Dec 2019 09:43:33 +0800 Subject: [PATCH 3/6] Verify remove virtualenv --- dev/create-release/spark-rm/Dockerfile | 12 ++++++------ dev/make-distribution.sh | 2 +- docs/building-spark.md | 2 +- python/setup.py | 1 + 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 6d1d96c8c46a..5814598ce669 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -33,7 +33,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true # These arguments are just for reuse and not really meant to be customized. ARG APT_INSTALL="apt-get install --no-install-recommends -y" -ARG BASE_PIP_PKGS="setuptools wheel virtualenv" +ARG BASE_PIP_PKGS="setuptools wheel" ARG PIP_PKGS="pyopenssl pypandoc numpy sphinx" # Install extra needed repos and refresh. @@ -56,6 +56,10 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ # Install openjdk 8. $APT_INSTALL openjdk-8-jdk && \ update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && \ + # Change default python version to python3. + update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ + update-alternatives --set python /usr/bin/python3.6 && \ # Install build / source control tools $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \ pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \ @@ -66,14 +70,10 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ pip3 install $BASE_PIP_PKGS && \ pip3 install $PIP_PKGS && \ cd && \ - virtualenv -p python3 /opt/p35 && \ + python /opt/p35 && \ . /opt/p35/bin/activate && \ pip3 install $BASE_PIP_PKGS && \ pip3 install $PIP_PKGS && \ - # Change default python version to python3. - update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ - update-alternatives --set python /usr/bin/python3.6 && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 8ba93aa56f00..16fbb71be429 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -233,7 +233,7 @@ if [ "$MAKE_PIP" == "true" ]; then pushd "$SPARK_HOME/python" > /dev/null # Delete the egg info file if it exists, this can cache older setup files. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" - python3 setup.py sdist + python setup.py sdist popd > /dev/null else echo "Skipping building python distribution package" diff --git a/docs/building-spark.md b/docs/building-spark.md index 891d6225bbdd..898f904461a6 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -66,7 +66,7 @@ with Maven profile settings and so on like the direct Maven build. Example: ./dev/make-distribution.sh --name custom-spark --pip --r --tgz -Psparkr -Phive -Phive-thriftserver -Pmesos -Pyarn -Pkubernetes -This will build Spark distribution along with Python pip and R packages. (Note that build with Python pip package requires Python 3.6). For more information on usage, run `./dev/make-distribution.sh --help` +This will build Spark distribution along with Python pip and R packages. For more information on usage, run `./dev/make-distribution.sh --help` ## Specifying the Hadoop Version and Enabling YARN diff --git a/python/setup.py b/python/setup.py index 138161ff13b4..28ce9ff90dfb 100755 --- a/python/setup.py +++ b/python/setup.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import print_function import glob import os import sys From 02896e38fcf42da5cd05c350cb2622ccd80fc061 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 12 Dec 2019 10:06:44 +0800 Subject: [PATCH 4/6] Fix: update-alternatives: error: alternative path /usr/bin/python2.7 doesn't exist --- dev/create-release/spark-rm/Dockerfile | 8 ++++---- dev/make-distribution.sh | 2 +- python/setup.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 5814598ce669..0d1a9fc7cd0f 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -56,10 +56,6 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ # Install openjdk 8. $APT_INSTALL openjdk-8-jdk && \ update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && \ - # Change default python version to python3. - update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ - update-alternatives --set python /usr/bin/python3.6 && \ # Install build / source control tools $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \ pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \ @@ -67,6 +63,10 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ $APT_INSTALL nodejs && \ # Install needed python packages. Use pip for installing packages (for consistency). $APT_INSTALL libpython3-dev python3-pip && \ + # Change default python version to python3. + update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \ + update-alternatives --set python /usr/bin/python3.6 && \ pip3 install $BASE_PIP_PKGS && \ pip3 install $PIP_PKGS && \ cd && \ diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 16fbb71be429..8ba93aa56f00 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -233,7 +233,7 @@ if [ "$MAKE_PIP" == "true" ]; then pushd "$SPARK_HOME/python" > /dev/null # Delete the egg info file if it exists, this can cache older setup files. rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" - python setup.py sdist + python3 setup.py sdist popd > /dev/null else echo "Skipping building python distribution package" diff --git a/python/setup.py b/python/setup.py index 28ce9ff90dfb..138161ff13b4 100755 --- a/python/setup.py +++ b/python/setup.py @@ -16,7 +16,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function import glob import os import sys From 1f2ede8d7de242ed38007e302dcca3ed97593160 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 12 Dec 2019 11:12:34 +0800 Subject: [PATCH 5/6] Address comment --- dev/create-release/spark-rm/Dockerfile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 0d1a9fc7cd0f..12195f556107 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -69,11 +69,6 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ update-alternatives --set python /usr/bin/python3.6 && \ pip3 install $BASE_PIP_PKGS && \ pip3 install $PIP_PKGS && \ - cd && \ - python /opt/p35 && \ - . /opt/p35/bin/activate && \ - pip3 install $BASE_PIP_PKGS && \ - pip3 install $PIP_PKGS && \ # Install R packages and dependencies used when building. # R depends on pandoc*, libssl (which are installed above). $APT_INSTALL r-base r-base-dev && \ From 71b642d1698971696669cb89ac864236e91e129d Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 12 Dec 2019 13:45:02 +0800 Subject: [PATCH 6/6] This reverts commit 1f94bf49 and d6be46e --- dev/create-release/do-release-docker.sh | 3 -- python/docs/Makefile | 37 ++++--------------------- 2 files changed, 6 insertions(+), 34 deletions(-) diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh index f643c060eb32..694a87bf7808 100755 --- a/dev/create-release/do-release-docker.sh +++ b/dev/create-release/do-release-docker.sh @@ -136,9 +136,6 @@ if [ -n "$JAVA" ]; then JAVA_VOL="--volume $JAVA:/opt/spark-java" fi -# SPARK-24530: Sphinx must work with python 3 to generate doc correctly. -echo "SPHINXPYTHON=/opt/p35/bin/python" >> $ENVFILE - echo "Building $RELEASE_TAG; output will be at $WORKDIR/output" docker run -ti \ --env-file "$ENVFILE" \ diff --git a/python/docs/Makefile b/python/docs/Makefile index 4767fd9f1c03..66d3fc425daa 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -1,44 +1,19 @@ # Makefile for Sphinx documentation # -ifndef SPHINXBUILD -ifndef SPHINXPYTHON -SPHINXBUILD = sphinx-build -endif -endif - -ifdef SPHINXBUILD -# User-friendly check for sphinx-build. -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif -else -# Note that there is an issue with Python version and Sphinx in PySpark documentation generation. -# Please remove this check below when this issue is fixed. See SPARK-24530 for more details. -PYTHON_VERSION_CHECK = $(shell $(SPHINXPYTHON) -c 'import sys; print(sys.version_info < (3, 0, 0))') -ifeq ($(PYTHON_VERSION_CHECK), True) -$(error Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.) -endif -# Check if Sphinx is installed. -ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1) -$(error Python executable '$(SPHINXPYTHON)' did not have Sphinx installed. Make sure you have Sphinx installed, then set the SPHINXPYTHON environment variable to point to the Python executable having Sphinx installed. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif -# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build'. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details. -SPHINXBUILD = $(SPHINXPYTHON) -msphinx -endif - # You can set these variables from the command line. SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build PAPER ?= BUILDDIR ?= _build -# You can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx. -# They follow: -# 1. if SPHINXPYTHON is set, use Python. If SPHINXBUILD is set, use sphinx-build. -# 2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON -# 3. By default, SPHINXBUILD is used as 'sphinx-build'. export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.8.1-src.zip) +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter