diff --git a/docker/datahub-ingestion/Dockerfile b/docker/datahub-ingestion/Dockerfile index d7084986b44263..09797f741a8255 100644 --- a/docker/datahub-ingestion/Dockerfile +++ b/docker/datahub-ingestion/Dockerfile @@ -5,7 +5,19 @@ FROM acryldata/datahub-ingestion-base as base FROM openjdk:11 as prod-build COPY . /datahub-src -RUN cd /datahub-src && ./gradlew :wrapper && ./gradlew :metadata-events:mxe-schemas:build +# We noticed that the gradle wrapper download failed frequently on in CI on arm64 machines. +# I suspect this was due because of the QEMU emulation slowdown, combined with the arm64 +# build being starved for CPU by the x86_64 build's codegen step. +# +# The middle step will attempt to download gradle wrapper 5 times with exponential backoff. +# The ./gradlew --version will force the download of the gradle wrapper but is otherwise a no-op. +# Note that the retry logic will always return success, so we should always attempt to run codegen. +# Inspired by https://github.com/gradle/gradle/issues/18124#issuecomment-958182335. +# and https://unix.stackexchange.com/a/82610/378179. +# This is a workaround for https://github.com/gradle/gradle/issues/18124. +RUN cd /datahub-src && \ + (for attempt in 1 2 3 4 5; do ./gradlew --version && break ; echo "Failed to download gradle wrapper (attempt $attempt)" && sleep $((2*2**$attempt)) ; done ) && \ + ./gradlew :metadata-events:mxe-schemas:build FROM base as prod-codegen COPY --from=prod-build /datahub-src /datahub-src