diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ccc437269bfa7..68fd48cc65328 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -689,17 +689,13 @@ jobs: # Should delete this section after SPARK 3.5 EOL. python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - - name: Install Python linter dependencies - if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - # Jinja2 3.0.0+ causes error when building with Sphinx. - # See also https://issues.apache.org/jira/browse/SPARK-35375. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1' - python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - - name: Python linter + python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' + - name: Python linter for branch-3.3, branch-3.4, branch-3.5 + if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python + - name: Python linter + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + run: conda run -n doc ./dev/lint-python - name: Install dependencies for Python code generation check if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' run: | @@ -708,11 +704,13 @@ jobs: mkdir -p $HOME/buf tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1 rm buf-Linux-x86_64.tar.gz - python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' + - name: Python code generation check for branch-3.5 + if: inputs.branch == 'branch-3.5' + run: PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py - name: Python code generation check - if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' - run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi - # Should delete this section after SPARK 3.5 EOL. + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + run: PATH=$PATH:$HOME/buf/bin conda run -n doc ./dev/connect-check-protos.py + # Should delete this section after SPARK 3.5 EOL. - name: Install JavaScript linter dependencies for branch-3.3, branch-3.4, branch-3.5 if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | @@ -743,8 +741,6 @@ jobs: Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2', 'ggplot2', 'mvtnorm', 'statmod'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" - - name: Install dependencies for documentation generation - run: | # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. # Jinja2 3.0.0+ causes error when building with Sphinx. @@ -755,13 +751,30 @@ jobs: python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 + - name: Install dependencies for documentation generation + run: | gem install bundler cd docs bundle install - name: R linter run: ./dev/lint-r + - name: Run documentation build for branch-3.3, branch-3.4, branch-3.5 + if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' + run: | + if [ -f "./dev/is-changed.py" ]; then + # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs + pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` + if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi + if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi + fi + cd docs + bundle exec jekyll build - name: Run documentation build + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + shell: 'script -q -e -c "bash {0}"' run: | + # See also https://github.com/conda/conda/issues/7980 + source $(conda info --base)/etc/profile.d/conda.sh && conda activate doc if [ -f "./dev/is-changed.py" ]; then # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 10ae49b71665f..e353e62e04cca 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -139,3 +139,60 @@ RUN python3.12 -m pip install 'grpcio==1.59.3' 'grpcio-status==1.59.3' 'protobuf # TODO(SPARK-46078) Use official one instead of nightly build when it's ready RUN python3.12 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu RUN python3.12 -m pip install torcheval + + +# Refer to https://github.com/ContinuumIO/docker-images/blob/main/miniconda3/debian/Dockerfile +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -q && \ + bash miniconda.sh -b -p /opt/miniconda3 && \ + rm miniconda.sh && \ + ln -s /opt/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + ln -s /opt/miniconda3/bin/conda /usr/local/bin/conda && \ + find /opt/miniconda3/ -follow -type f -name '*.a' -delete && \ + find /opt/miniconda3/ -follow -type f -name '*.js.map' -delete && \ + conda clean -afy + +# Additional Python deps for linter and documentation, delete this section if another Python version is used +# Since there maybe conflicts between envs, here uses conda to manage it. +# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. +# See also https://github.com/sphinx-doc/sphinx/issues/7551. +# Jinja2 3.0.0+ causes error when building with Sphinx. +# See also https://issues.apache.org/jira/browse/SPARK-35375. +RUN conda create -n doc python=3.9 + +RUN conda run -n doc pip install \ + 'black==23.9.1' \ + 'docutils<0.18.0' \ + 'flake8==3.9.0' \ + 'googleapis-common-protos-stubs==2.2.0' \ + 'grpc-stubs==1.24.11' \ + 'grpcio==1.59.3' \ + 'ipython' \ + 'ipython_genutils' \ + 'jinja2<3.0.0' \ + 'markupsafe==2.0.1' \ + 'matplotlib' \ + 'mkdocs' \ + 'mypy-protobuf==3.3.0' \ + 'mypy==0.982' \ + 'nbsphinx' \ + 'numpy>=1.20.0' \ + 'numpydoc' \ + 'pandas' \ + 'pandas-stubs==1.2.0.53' \ + 'plotly>=4.8' \ + 'protobuf==4.25.1' \ + 'pyarrow' \ + 'pydata_sphinx_theme>=0.13' \ + 'pytest-mypy-plugins==1.9.3' \ + 'pytest==7.1.3' \ + 'pyzmq<24.0.0' \ + 'sphinx-copybutton' \ + 'sphinx==4.2.0' \ + 'sphinx_plotly_directive' +RUN conda run -n doc pip install \ + 'torch<=2.0.1' \ + 'torchvision' --index-url https://download.pytorch.org/whl/cpu +RUN conda run -n doc pip install \ + 'torcheval' + +RUN conda clean -afy