From ef8b3ad9338f3c3faa770aef279c8d2d48ee11d6 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 22 Nov 2023 14:58:05 +0800 Subject: [PATCH 1/5] init fix doc --- .github/workflows/build_and_test.yml | 13 ++----------- dev/infra/Dockerfile | 10 ++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ccc437269bfa..1f63278e609f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -689,15 +689,6 @@ jobs: # Should delete this section after SPARK 3.5 EOL. python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - - name: Install Python linter dependencies - if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' - run: | - # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. - # See also https://github.com/sphinx-doc/sphinx/issues/7551. - # Jinja2 3.0.0+ causes error when building with Sphinx. - # See also https://issues.apache.org/jira/browse/SPARK-35375. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1' - python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python - name: Install dependencies for Python code generation check @@ -743,8 +734,6 @@ jobs: Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2', 'ggplot2', 'mvtnorm', 'statmod'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" - - name: Install dependencies for documentation generation - run: | # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. # Jinja2 3.0.0+ causes error when building with Sphinx. @@ -755,6 +744,8 @@ jobs: python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 + - name: Install dependencies for documentation generation + run: | gem install bundler cd docs bundle install diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 10ae49b71665..ccb95b44dab8 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -104,6 +104,16 @@ RUN python3.9 -m pip install torcheval # Add Deepspeed as a testing dependency for DeepspeedTorchDistributor RUN python3.9 -m pip install deepspeed +# Additional Python deps for linter and documentation, delete this section if another Python version is used +# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. +# See also https://github.com/sphinx-doc/sphinx/issues/7551. +# Jinja2 3.0.0+ causes error when building with Sphinx. +# See also https://issues.apache.org/jira/browse/SPARK-35375. +RUN python3.9 -m pip install 'flake8==3.9.0' 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'jinja2<3.0.0' 'black==23.9.1' +RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' +RUN python3.9 -m pip install 'sphinx<3.1.0' pydata_sphinx_theme mkdocs sphinx-copybutton nbsphinx numpydoc 'markupsafe==2.0.1' 'pyzmq<24.0.0' +RUN python3.9 -m pip install ipython ipython_genutils sphinx_plotly_directive 'docutils<0.18.0' + # Install Python 3.10 at the last stage to avoid breaking Python 3.9 RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y \ From 0ef15c75b80ace58706e747f7ec0f9cdc70f1882 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 22 Nov 2023 17:06:53 +0800 Subject: [PATCH 2/5] exact copy --- dev/infra/Dockerfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index ccb95b44dab8..9bbe66b8ee36 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -109,10 +109,12 @@ RUN python3.9 -m pip install deepspeed # See also https://github.com/sphinx-doc/sphinx/issues/7551. # Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. -RUN python3.9 -m pip install 'flake8==3.9.0' 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'jinja2<3.0.0' 'black==23.9.1' -RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' -RUN python3.9 -m pip install 'sphinx<3.1.0' pydata_sphinx_theme mkdocs sphinx-copybutton nbsphinx numpydoc 'markupsafe==2.0.1' 'pyzmq<24.0.0' -RUN python3.9 -m pip install ipython ipython_genutils sphinx_plotly_directive 'docutils<0.18.0' +RUN python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1' +RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ +RUN python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' +RUN python3.9 -m pip install ipython_genutils +RUN python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' +RUN python3.9 -m pip install 'docutils<0.18.0' # Install Python 3.10 at the last stage to avoid breaking Python 3.9 RUN add-apt-repository ppa:deadsnakes/ppa From fcc19c09903f9ecdb44d5011fbc0148281c30c36 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 22 Nov 2023 17:46:30 +0800 Subject: [PATCH 3/5] fix --- dev/infra/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 9bbe66b8ee36..25c46d1e081f 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -110,7 +110,7 @@ RUN python3.9 -m pip install deepspeed # Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. RUN python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1' -RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ +RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' RUN python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' RUN python3.9 -m pip install ipython_genutils RUN python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' From 5619cec8225374f8ca96eb3b04e50dda1b00caf3 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 27 Nov 2023 16:22:22 +0800 Subject: [PATCH 4/5] use conda fix fix fix --- .github/workflows/build_and_test.yml | 32 +++++++++++--- dev/infra/Dockerfile | 64 ++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1f63278e609f..68fd48cc6532 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -689,8 +689,13 @@ jobs: # Should delete this section after SPARK 3.5 EOL. python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - - name: Python linter + python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' + - name: Python linter for branch-3.3, branch-3.4, branch-3.5 + if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python + - name: Python linter + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + run: conda run -n doc ./dev/lint-python - name: Install dependencies for Python code generation check if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' run: | @@ -699,11 +704,13 @@ jobs: mkdir -p $HOME/buf tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1 rm buf-Linux-x86_64.tar.gz - python3.9 -m pip install 'protobuf==4.25.1' 'mypy-protobuf==3.3.0' + - name: Python code generation check for branch-3.5 + if: inputs.branch == 'branch-3.5' + run: PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py - name: Python code generation check - if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' - run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi - # Should delete this section after SPARK 3.5 EOL. + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + run: PATH=$PATH:$HOME/buf/bin conda run -n doc ./dev/connect-check-protos.py + # Should delete this section after SPARK 3.5 EOL. - name: Install JavaScript linter dependencies for branch-3.3, branch-3.4, branch-3.5 if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' run: | @@ -751,8 +758,23 @@ jobs: bundle install - name: R linter run: ./dev/lint-r + - name: Run documentation build for branch-3.3, branch-3.4, branch-3.5 + if: inputs.branch == 'branch-3.3' || inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5' + run: | + if [ -f "./dev/is-changed.py" ]; then + # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs + pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` + if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi + if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi + fi + cd docs + bundle exec jekyll build - name: Run documentation build + if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5' + shell: 'script -q -e -c "bash {0}"' run: | + # See also https://github.com/conda/conda/issues/7980 + source $(conda info --base)/etc/profile.d/conda.sh && conda activate doc if [ -f "./dev/is-changed.py" ]; then # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index 25c46d1e081f..d1bfa56794ea 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -104,18 +104,6 @@ RUN python3.9 -m pip install torcheval # Add Deepspeed as a testing dependency for DeepspeedTorchDistributor RUN python3.9 -m pip install deepspeed -# Additional Python deps for linter and documentation, delete this section if another Python version is used -# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. -# See also https://github.com/sphinx-doc/sphinx/issues/7551. -# Jinja2 3.0.0+ causes error when building with Sphinx. -# See also https://issues.apache.org/jira/browse/SPARK-35375. -RUN python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==23.9.1' -RUN python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.59.3' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' -RUN python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme sphinx-copybutton nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' -RUN python3.9 -m pip install ipython_genutils -RUN python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' -RUN python3.9 -m pip install 'docutils<0.18.0' - # Install Python 3.10 at the last stage to avoid breaking Python 3.9 RUN add-apt-repository ppa:deadsnakes/ppa RUN apt-get update && apt-get install -y \ @@ -151,3 +139,55 @@ RUN python3.12 -m pip install 'grpcio==1.59.3' 'grpcio-status==1.59.3' 'protobuf # TODO(SPARK-46078) Use official one instead of nightly build when it's ready RUN python3.12 -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu RUN python3.12 -m pip install torcheval + + +RUN curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh && \ + bash miniconda.sh -b -p /opt/miniconda3 && \ + rm miniconda.sh && \ + ln -sf /opt/miniconda3/bin/conda /usr/local/bin/conda + +# Additional Python deps for linter and documentation, delete this section if another Python version is used +# Since there maybe conflicts between envs, here uses conda to manage it. +# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. +# See also https://github.com/sphinx-doc/sphinx/issues/7551. +# Jinja2 3.0.0+ causes error when building with Sphinx. +# See also https://issues.apache.org/jira/browse/SPARK-35375. +RUN conda create -n doc python=3.9 + +RUN conda run -n doc pip install \ + 'black==23.9.1' \ + 'docutils<0.18.0' \ + 'flake8==3.9.0' \ + 'googleapis-common-protos-stubs==2.2.0' \ + 'grpc-stubs==1.24.11' \ + 'grpcio==1.59.3' \ + 'ipython' \ + 'ipython_genutils' \ + 'jinja2<3.0.0' \ + 'markupsafe==2.0.1' \ + 'matplotlib' \ + 'mkdocs' \ + 'mypy-protobuf==3.3.0' \ + 'mypy==0.982' \ + 'nbsphinx' \ + 'numpy>=1.20.0' \ + 'numpydoc' \ + 'pandas' \ + 'pandas-stubs==1.2.0.53' \ + 'plotly>=4.8' \ + 'protobuf==4.25.1' \ + 'pyarrow' \ + 'pydata_sphinx_theme' \ + 'pytest-mypy-plugins==1.9.3' \ + 'pytest==7.1.3' \ + 'pyzmq<24.0.0' \ + 'sphinx-copybutton' \ + 'sphinx<3.1.0' \ + 'sphinx_plotly_directive' +RUN conda run -n doc pip install \ + 'torch<=2.0.1' \ + 'torchvision' --index-url https://download.pytorch.org/whl/cpu +RUN conda run -n doc pip install \ + 'torcheval' + +RUN conda clean --all From 4b30f7dbc285700eb7713fcd45595ae540c0fe72 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Mon, 27 Nov 2023 17:20:20 +0800 Subject: [PATCH 5/5] refer to miniconda dockerfile refer to miniconda dockerfile fix version --- dev/infra/Dockerfile | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index d1bfa56794ea..e353e62e04cc 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -141,10 +141,15 @@ RUN python3.12 -m pip install --pre torch --index-url https://download.pytorch.o RUN python3.12 -m pip install torcheval -RUN curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh && \ +# Refer to https://github.com/ContinuumIO/docker-images/blob/main/miniconda3/debian/Dockerfile +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -q && \ bash miniconda.sh -b -p /opt/miniconda3 && \ rm miniconda.sh && \ - ln -sf /opt/miniconda3/bin/conda /usr/local/bin/conda + ln -s /opt/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + ln -s /opt/miniconda3/bin/conda /usr/local/bin/conda && \ + find /opt/miniconda3/ -follow -type f -name '*.a' -delete && \ + find /opt/miniconda3/ -follow -type f -name '*.js.map' -delete && \ + conda clean -afy # Additional Python deps for linter and documentation, delete this section if another Python version is used # Since there maybe conflicts between envs, here uses conda to manage it. @@ -177,12 +182,12 @@ RUN conda run -n doc pip install \ 'plotly>=4.8' \ 'protobuf==4.25.1' \ 'pyarrow' \ - 'pydata_sphinx_theme' \ + 'pydata_sphinx_theme>=0.13' \ 'pytest-mypy-plugins==1.9.3' \ 'pytest==7.1.3' \ 'pyzmq<24.0.0' \ 'sphinx-copybutton' \ - 'sphinx<3.1.0' \ + 'sphinx==4.2.0' \ 'sphinx_plotly_directive' RUN conda run -n doc pip install \ 'torch<=2.0.1' \ @@ -190,4 +195,4 @@ RUN conda run -n doc pip install \ RUN conda run -n doc pip install \ 'torcheval' -RUN conda clean --all +RUN conda clean -afy