From edb6ccc6470bcdd6cf8a8136faa487ed9a0f9d2a Mon Sep 17 00:00:00 2001 From: Domenic Denicola Date: Tue, 16 Jun 2020 18:49:42 -0400 Subject: [PATCH] Improvements to the CI Docker build This is a series of improvements that will help pave the way for a better CI build/deploy architecture in the future, potentially based on GitHub actions. In particular: * Stop grabbing pdfsizeopt using git from outside the container. Instead, get it as part of the Dockerfile along with other dependencies. * Avoid the multi-step PDF generation process, where we deploy to whatwg.org, point Prince at that, and then deploy the generated print.pdf to whatwg.org. Instead, we serve the built output inside the Docker container, use that to generate print.pdf, and deploy everything to whatwg.org in a single step. * Update Prince from v11.3 to v13.5. * Download only the .jar file for the validator, instead of the full package; we can let Docker handle installing Java. --- ci-deploy/Dockerfile | 53 ++++++++-------- ci-deploy/inside-container.sh | 41 ++++++------ ci-deploy/outside-container.sh | 110 ++++++++++++++++++--------------- 3 files changed, 106 insertions(+), 98 deletions(-) diff --git a/ci-deploy/Dockerfile b/ci-deploy/Dockerfile index 16bd093..a68ef15 100644 --- a/ci-deploy/Dockerfile +++ b/ci-deploy/Dockerfile @@ -3,47 +3,44 @@ FROM debian:stable RUN apt-get update && \ - apt-get install -y ca-certificates curl rsync git unzip \ - default-jre \ - libfontconfig1 libgomp1 libxml2 \ - python3 python3-pip \ + apt-get install --yes --no-install-recommends \ + ca-certificates curl rsync git \ + default-jre \ + python3 python3-pip \ fonts-dejavu fonts-droid-fallback fonts-liberation fonts-symbola fonts-unfonts-core # Dependency lines above are: # - General # - validator -# - Prince # - Highlighter -# - fonts +# - fonts, for when Prince renders to PDF +COPY --from=whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi +COPY --from=ptspts/pdfsizeopt:latest /bin /bin/pdfsizeopt RUN pip3 install bs-highlighter -# Dependecies of prince_11.3-1_debian8.0_amd64.deb (not used) are libc6 libcurl3 libfontconfig1 -# libfreetype6 libgif4 libgomp1 libjpeg62-turbo libpng12-0 libssl1.0.0 libtiff5 libxml2 zlib1g. -# Above we install only the subset that's needed to make Prince work. - -ADD https://www.princexml.com/download/prince-11.3-linux-generic-x86_64.tar.gz /whatwg/prince.tar.gz +# The DockerHub container for pdfsizeopt has some useful auxiliary binaries it depends on, but the +# main binary is outdated and needs updating directly from GitHub: +# TODO: consolidate these two lines when https://github.com/moby/buildkit/pull/1492 gets released +# (see https://github.com/moby/moby/issues/34819). +ADD https://github.com/pts/pdfsizeopt/blob/master/pdfsizeopt.single?raw=true /bin/pdfsizeopt/pdfsizeopt +RUN chmod a+rwx /bin/pdfsizeopt/pdfsizeopt -RUN cd /whatwg && \ - tar xzf prince.tar.gz && \ - echo /whatwg/prince | /whatwg/prince-11.3-linux-generic-x86_64/install.sh && \ - echo '@font-face { font-family: serif; src: local("Symbola") }' >> /whatwg/prince/lib/prince/style/fonts.css && \ - rm -rf /whatwg/prince.tar.gz /whatwg/prince-11.3-linux-generic-x86_64 -ENV PATH="/whatwg/prince/bin:${PATH}" +# The DockerHub container for the validator only contains the server version, so we get the .jar +# from GitHub: +ADD https://github.com/validator/validator/releases/download/jar/vnu.jar /whatwg/ -ADD pdfsizeopt/pdfsizeopt.single /whatwg/pdfsizeopt/bin/pdfsizeopt -ADD https://github.com/pts/pdfsizeopt/releases/download/2017-01-24/pdfsizeopt_libexec_linux-v3.tar.gz /whatwg/pdfsizeopt_libexec.tar.gz -RUN cd /whatwg && \ - tar xzf pdfsizeopt_libexec.tar.gz && \ - mv pdfsizeopt_libexec/* pdfsizeopt/bin/ && \ - rm -rf pdfsizeopt_libexec.tar.gz pdfsizeopt_libexec -ENV PATH="/whatwg/pdfsizeopt/bin:${PATH}" - -COPY --from=whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi +# Trying to copy Prince from its DockerHub container like the others does not work; it has too many +# shared library dependencies. So we get the .deb file from the source: +ADD https://www.princexml.com/download/prince_13.5-1_debian10_amd64.deb . +RUN apt-get install --yes --no-install-recommends ./prince_13.5-1_debian10_amd64.deb && \ + rm prince_13.5-1_debian10_amd64.deb && \ + echo '@font-face { font-family: serif; src: local("Symbola") }' >> /usr/lib/prince/style/fonts.css -ADD html-build /whatwg/html-build +ARG html_build_dir +ADD . /whatwg/html-build -# Note: we do not ADD /whatwg/html, but instead mount it using --volume in .travis.yml, since it +# Note: we do not ADD /whatwg/html, but instead mount it in outside-container.html, since it # contains the deploy_key, and thus should not be part of the image. The image is cached, publicly, # on Docker Hub. ENV HTML_SOURCE /whatwg/html diff --git a/ci-deploy/inside-container.sh b/ci-deploy/inside-container.sh index 15f3e44..4bf2845 100644 --- a/ci-deploy/inside-container.sh +++ b/ci-deploy/inside-container.sh @@ -4,7 +4,7 @@ set -o nounset set -o pipefail cd "$(dirname "$0")/../.." -PDF_SOURCE_URL="https://html.spec.whatwg.org/" +PDF_SERVE_PORT=8080 WEB_ROOT="html.spec.whatwg.org" COMMITS_DIR="commit-snapshots" REVIEW_DIR="review-drafts" @@ -26,13 +26,28 @@ IS_TEST_OF_HTML_BUILD_ITSELF=${IS_TEST_OF_HTML_BUILD_ITSELF:-false} # Conformance-check the result echo "" -echo "Downloading and running conformance checker..." -curl --retry 2 --remote-name --fail --location https://github.com/validator/validator/releases/download/linux/vnu.linux.zip -unzip vnu.linux.zip +echo "Running conformance checker..." # the -Xmx1g argument sets the size of the Java heap space to 1 gigabyte -./vnu-runtime-image/bin/java -Xmx1g -m vnu/nu.validator.client.SimpleCommandLineValidator --skip-non-html "$HTML_OUTPUT" +java -Xmx1g -jar ./vnu.jar --skip-non-html "$HTML_OUTPUT" echo "" +# Serve the built output so that Prince can snapshot it +# The nohup/sleep incantations are necessary because normal & does not work inside Docker: +# https://stackoverflow.com/q/50211207/3191 +( + cd "$HTML_OUTPUT" + nohup bash -c "python3 -m http.server $PDF_SERVE_PORT &" && sleep 4 +) + +echo "" +echo "Building PDF..." +PDF_TMP="$(mktemp --suffix=.pdf)" +prince --verbose --output "$PDF_TMP" "http://0.0.0.0:$PDF_SERVE_PORT/" + +echo "" +echo "Optimizing PDF..." +PATH=/bin/pdfsizeopt:$PATH pdfsizeopt --v=30 "$PDF_TMP" "$HTML_OUTPUT/print.pdf" + if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then echo "Skipping deploy for non-master" exit 0 @@ -54,7 +69,6 @@ echo "Deploying build output..." rsync --rsh="ssh -o UserKnownHostsFile=known_hosts" \ --archive --chmod=D755,F644 --compress --verbose \ --delete --exclude="$COMMITS_DIR" --exclude="$REVIEW_DIR" \ - --exclude=print.pdf \ "$HTML_OUTPUT/" "deploy@$SERVER:/var/www/$WEB_ROOT" # Now sync a commit snapshot and a review draft, if any @@ -66,20 +80,5 @@ rsync --rsh="ssh -o UserKnownHostsFile=known_hosts" \ --archive --chmod=D755,F644 --compress --verbose \ "$HTML_OUTPUT/$COMMITS_DIR" "$HTML_OUTPUT/$REVIEW_DIR" "deploy@$SERVER:/var/www/$WEB_ROOT" -echo "" -echo "Building PDF..." -PDF_TMP="$(mktemp --suffix=.pdf)" -prince --verbose --output "$PDF_TMP" "$PDF_SOURCE_URL" - -echo "" -echo "Optimizing PDF..." -pdfsizeopt --v=40 "$PDF_TMP" "$HTML_OUTPUT/print.pdf" - -echo "" -echo "Deploying PDF..." -rsync --rsh="ssh -o UserKnownHostsFile=known_hosts" \ - --archive --compress --verbose \ - "$HTML_OUTPUT/print.pdf" "deploy@$SERVER:/var/www/$WEB_ROOT/print.pdf" - echo "" echo "All done!" diff --git a/ci-deploy/outside-container.sh b/ci-deploy/outside-container.sh index 9eeaea2..0fe1776 100644 --- a/ci-deploy/outside-container.sh +++ b/ci-deploy/outside-container.sh @@ -2,54 +2,66 @@ set -o errexit set -o nounset set -o pipefail +shopt -s extglob -HERE=$(dirname "$0") -cd "$HERE/../.." - -DOCKER_USERNAME="domenicdenicola" -DOCKER_HUB_REPO="whatwg/html-deploy" - -# Set from the outside: -TRAVIS_PULL_REQUEST=${TRAVIS_PULL_REQUEST:-false} -IS_TEST_OF_HTML_BUILD_ITSELF=${IS_TEST_OF_HTML_BUILD_ITSELF:-false} - -# When not running pull request builds: -# - DOCKER_PASSWORD is set from the outside -# - ENCRYPTION_LABEL is set from the outside - -git clone --depth 1 https://github.com/pts/pdfsizeopt.git pdfsizeopt - -# Copy the Docker-related stuff into the working (grandparent) directory. -cp "$HERE"/{.dockerignore,Dockerfile} . - -# Build the Docker image, using Docker Hub as a cache. (This will be fast if nothing has changed -# in wattsi or html-build). -docker pull "$DOCKER_HUB_REPO:latest" -docker build --cache-from "$DOCKER_HUB_REPO:latest" \ - --tag "$DOCKER_HUB_REPO:latest" \ - --build-arg "travis_pull_request=$TRAVIS_PULL_REQUEST" \ - --build-arg "is_test_of_html_build_itself=$IS_TEST_OF_HTML_BUILD_ITSELF" \ - . -if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$IS_TEST_OF_HTML_BUILD_ITSELF" == "false" ]]; then - # Decrypt the deploy key from this script's location into the html/ directory, since that's the - # directory that will be shared with the container (but not built into the image). - ENCRYPTED_KEY_VAR="encrypted_${ENCRYPTION_LABEL}_key" - ENCRYPTED_IV_VAR="encrypted_${ENCRYPTION_LABEL}_iv" - ENCRYPTED_KEY=${!ENCRYPTED_KEY_VAR} - ENCRYPTED_IV=${!ENCRYPTED_IV_VAR} - openssl aes-256-cbc -K "$ENCRYPTED_KEY" -iv "$ENCRYPTED_IV" \ - -in "$HERE/deploy-key.enc" -out html/deploy-key -d -fi - -# Run the inside-container.sh script, with the html/ directory mounted inside the container. -echo "" -docker run --volume "$(pwd)/html":/whatwg/html "$DOCKER_HUB_REPO:latest" - -if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$IS_TEST_OF_HTML_BUILD_ITSELF" == "false" ]]; then - # If the build succeeded and we got here, upload the Docker image to Docker Hub, so that future runs - # can use it as a cache. +HERE="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +TMP_DIR=$(mktemp -d) + +function main { + cp "$HERE/Dockerfile" "$TMP_DIR" + cd "$HERE/.." + cp -r !(.*|html|Dockerfile) "$TMP_DIR" + cp .*.pl "$TMP_DIR" + cd "$TMP_DIR" + trap cleanTemp EXIT + + DOCKER_USERNAME="domenicdenicola" + DOCKER_HUB_REPO="whatwg/html-deploy" + + # Set from the outside: + TRAVIS_PULL_REQUEST=${TRAVIS_PULL_REQUEST:-false} + IS_TEST_OF_HTML_BUILD_ITSELF=${IS_TEST_OF_HTML_BUILD_ITSELF:-false} + + # When not running pull request builds: + # - DOCKER_PASSWORD is set from the outside + # - ENCRYPTION_LABEL is set from the outside + + # Build the Docker image, using Docker Hub as a cache. (This will be fast if nothing has changed + # in wattsi or html-build). + docker build --cache-from "$DOCKER_HUB_REPO:latest" \ + --tag "$DOCKER_HUB_REPO:latest" \ + --build-arg "html_build_dir=$TMP_DIR" \ + --build-arg "travis_pull_request=$TRAVIS_PULL_REQUEST" \ + --build-arg "is_test_of_html_build_itself=$IS_TEST_OF_HTML_BUILD_ITSELF" \ + . + if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$IS_TEST_OF_HTML_BUILD_ITSELF" == "false" ]]; then + # Decrypt the deploy key from this script's location into the html/ directory, since that's the + # directory that will be shared with the container (but not built into the image). + ENCRYPTED_KEY_VAR="encrypted_${ENCRYPTION_LABEL}_key" + ENCRYPTED_IV_VAR="encrypted_${ENCRYPTION_LABEL}_iv" + ENCRYPTED_KEY=${!ENCRYPTED_KEY_VAR} + ENCRYPTED_IV=${!ENCRYPTED_IV_VAR} + openssl aes-256-cbc -K "$ENCRYPTED_KEY" -iv "$ENCRYPTED_IV" \ + -in "$HERE/deploy-key.enc" -out html/deploy-key -d + fi + + # Run the inside-container.sh script, with the html/ directory mounted inside the container. echo "" - docker tag "$DOCKER_HUB_REPO:latest" "$DOCKER_HUB_REPO:$TRAVIS_BUILD_NUMBER" && - docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD" - docker push "$DOCKER_HUB_REPO" -fi + cd "$HERE/../.." + docker run --mount "type=bind,source=$(pwd)/html,destination=/whatwg/html,readonly=1" "$DOCKER_HUB_REPO:latest" + + if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$IS_TEST_OF_HTML_BUILD_ITSELF" == "false" ]]; then + # If the build succeeded and we got here, upload the Docker image to Docker Hub, so that future runs + # can use it as a cache. + echo "" + docker tag "$DOCKER_HUB_REPO:latest" "$DOCKER_HUB_REPO:$TRAVIS_BUILD_NUMBER" && + docker login -u "$DOCKER_USERNAME" -p "$DOCKER_PASSWORD" + docker push "$DOCKER_HUB_REPO" + fi +} + +function cleanTemp { + rm -rf "$TMP_DIR" +} + +main "$@"