diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 12195f556107..3ba8e9792961 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -20,7 +20,7 @@ # Includes: # * Java 8 # * Ivy -# * Python/PyPandoc (2.7.15/3.6.7) +# * Python (2.7.15/3.6.7) # * R-base/R-base-dev (3.6.1) # * Ruby 2.3 build utilities @@ -34,7 +34,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true ARG APT_INSTALL="apt-get install --no-install-recommends -y" ARG BASE_PIP_PKGS="setuptools wheel" -ARG PIP_PKGS="pyopenssl pypandoc numpy sphinx" +ARG PIP_PKGS="pyopenssl numpy sphinx" # Install extra needed repos and refresh. # - CRAN repo diff --git a/dev/requirements.txt b/dev/requirements.txt index 3fdd3425ffcc..baea9213dbc9 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -2,5 +2,4 @@ flake8==3.5.0 jira==1.0.3 PyGithub==1.26.0 Unidecode==0.04.19 -pypandoc==1.3.3 sphinx diff --git a/dev/run-pip-tests b/dev/run-pip-tests index 1294a9096fb9..470f21e69d46 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -85,7 +85,7 @@ for python in "${PYTHON_EXECS[@]}"; do fi # Upgrade pip & friends if using virtual env if [ ! -n "$USE_CONDA" ]; then - pip install --upgrade pip pypandoc wheel numpy + pip install --upgrade pip wheel numpy fi echo "Creating pip installable source dist" diff --git a/docs/README.md b/docs/README.md index 5197fc55f6b5..22039871cf63 100644 --- a/docs/README.md +++ b/docs/README.md @@ -37,16 +37,30 @@ installed. Also install the following libraries: ```sh $ sudo gem install jekyll jekyll-redirect-from rouge -# Following is needed only for generating API docs -$ sudo pip install sphinx pypandoc mkdocs numpy -$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")' -$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "5.0.1", repos="https://cloud.r-project.org/")' ``` Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0. +### R Documentation + +If you'd like to generate R documentation, you'll need to [install Pandoc](https://pandoc.org/installing.html) +and install these libraries: + +```sh +$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")' +$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "5.0.1", repos="https://cloud.r-project.org/")' +``` + Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 5.0.1, which is updated if the version is mismatched. +### API Documentation + +To generate API docs for any language, you'll need to install these libraries: + +```sh +$ sudo pip install sphinx mkdocs numpy +``` + ## Generating the Documentation HTML We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as diff --git a/python/setup.py b/python/setup.py index 965927a5694b..40b49aaeeb27 100755 --- a/python/setup.py +++ b/python/setup.py @@ -153,21 +153,15 @@ def _supports_symlinks(): # will search for SPARK_HOME with Python. scripts.append("pyspark/find_spark_home.py") - # Parse the README markdown file into rst for PyPI - long_description = "!!!!! missing pandoc do not upload to PyPI !!!!" - try: - import pypandoc - long_description = pypandoc.convert('README.md', 'rst') - except ImportError: - print("Could not import pypandoc - required to package PySpark", file=sys.stderr) - except OSError: - print("Could not convert - pandoc is not installed", file=sys.stderr) + with open('README.md') as f: + long_description = f.read() setup( name='pyspark', version=VERSION, description='Apache Spark Python API', long_description=long_description, + long_description_content_type="text/markdown", author='Spark Developers', author_email='dev@spark.apache.org', url='https://github.com/apache/spark/tree/master/python', @@ -213,7 +207,6 @@ def _supports_symlinks(): scripts=scripts, license='http://www.apache.org/licenses/LICENSE-2.0', install_requires=['py4j==0.10.8.1'], - setup_requires=['pypandoc'], extras_require={ 'ml': ['numpy>=1.7'], 'mllib': ['numpy>=1.7'],