Skip to content

Commit

Permalink
Stop using docker manifest to check for image presence (#17883)
Browse files Browse the repository at this point in the history
We need to set the "experimental" flag in CI in order to use
`docker manifest` command to check for presence of the images
in ghcr.io. In order to use them we need to enable experimental
features via ~/.docker/config.json.

Sometimes, very rarely, we had the case that the config file
got broken and the problem turned out to be that we tried
to do this experimental replacement in parallel by several
running "wait image" commands (:facepalm: here for myself)
that were apparenlty overriding the same config.json
at the same time in non-atomic way, which (very rarely)
led to corrupted file.

However for quite some time we pulled the image immediately
after it was available, in order to verify the image,
so rather than checking if the image
is there via manifest, we can simply pull the image
and effect will be the same - if it fails, the image is not
there, if it has been pulled - we can immediately verify
it. We do not need experimental flag at all for that
so no messing around with .docker/config.json is needed
at all.
  • Loading branch information
potiuk authored Sep 21, 2021
1 parent ab7acfd commit a8184e4
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 123 deletions.
3 changes: 2 additions & 1 deletion scripts/ci/images/ci_prepare_ci_image_on_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ function build_ci_image_on_ci() {
# Pretend that the image was build. We already have image with the right sources baked in!
# so all the checksums are assumed to be correct
md5sum::calculate_md5sum_for_all_files
build_images::wait_for_image_tag "${AIRFLOW_CI_IMAGE}" ":${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
local image_name_with_tag="${AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
push_pull_remove_images::wait_for_image "${image_name_with_tag}"
md5sum::update_all_md5_with_group
else
build_images::rebuild_ci_image_if_needed
Expand Down
3 changes: 2 additions & 1 deletion scripts/ci/images/ci_prepare_prod_image_on_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ function build_prod_images_on_ci() {
build_images::prepare_prod_build

if [[ ${GITHUB_REGISTRY_WAIT_FOR_IMAGE} == "true" ]]; then
build_images::wait_for_image_tag "${AIRFLOW_PROD_IMAGE}" ":${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
local image_name_with_tag="${AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"
push_pull_remove_images::wait_for_image "${image_name_with_tag}"
else
build_images::build_prod_images_from_locally_built_airflow_packages
fi
Expand Down
2 changes: 2 additions & 0 deletions scripts/ci/images/ci_push_ci_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@

build_images::prepare_ci_build

build_images::login_to_docker_registry

push_pull_remove_images::push_ci_images_to_github
2 changes: 2 additions & 0 deletions scripts/ci/images/ci_push_production_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@

build_images::prepare_prod_build

build_images::login_to_docker_registry

push_pull_remove_images::push_prod_images_to_github
5 changes: 3 additions & 2 deletions scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ source "${LIBRARIES_DIR}/_all_libs.sh"

initialization::set_output_color_variables

PARALLEL_TAIL_LENGTH=5
export PARALLEL_TAIL_LENGTH=5

parallel::make_sure_gnu_parallel_is_installed

Expand All @@ -35,11 +35,12 @@ echo
echo "${COLOR_BLUE}Waiting for all CI images to appear${COLOR_RESET}"
echo


parallel::initialize_monitoring

parallel::monitor_progress

build_images::login_to_docker_registry

# shellcheck disable=SC2086
parallel --results "${PARALLEL_MONITORED_DIR}" \
"$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_ci_image.sh" ::: \
Expand Down
4 changes: 3 additions & 1 deletion scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ source "${LIBRARIES_DIR}/_all_libs.sh"

initialization::set_output_color_variables

PARALLEL_TAIL_LENGTH=5
export PARALLEL_TAIL_LENGTH=5

parallel::make_sure_gnu_parallel_is_installed

Expand All @@ -39,6 +39,8 @@ parallel::initialize_monitoring

parallel::monitor_progress

build_images::login_to_docker_registry

# shellcheck disable=SC2086
parallel --results "${PARALLEL_MONITORED_DIR}" \
"$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_prod_image.sh" ::: \
Expand Down
15 changes: 1 addition & 14 deletions scripts/ci/images/ci_wait_for_and_verify_ci_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,9 @@ shift

image_name_with_tag="${AIRFLOW_CI_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"

function pull_ci_image() {
start_end::group_start "Pulling image: ${IMAGE_AVAILABLE}"
push_pull_remove_images::pull_image_if_not_present_or_forced "${IMAGE_AVAILABLE}"
start_end::group_end
}

start_end::group_start "Configure Docker Registry"
build_images::configure_docker_registry
start_end::group_end

start_end::group_start "Waiting for ${image_name_with_tag}"
push_pull_remove_images::wait_for_image "${image_name_with_tag}"
build_images::prepare_ci_build
start_end::group_end

pull_ci_image
push_pull_remove_images::wait_for_image "${image_name_with_tag}"

if [[ ${VERIFY_IMAGE=} != "false" ]]; then
verify_image::verify_ci_image "${image_name_with_tag}"
Expand Down
16 changes: 1 addition & 15 deletions scripts/ci/images/ci_wait_for_and_verify_prod_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,8 @@ shift

image_name_with_tag="${AIRFLOW_PROD_IMAGE}:${GITHUB_REGISTRY_PULL_IMAGE_TAG}"

function pull_prod_image() {
start_end::group_start "Pulling image: ${IMAGE_AVAILABLE}"
push_pull_remove_images::pull_image_if_not_present_or_forced "${IMAGE_AVAILABLE}"
start_end::group_end
}

start_end::group_start "Configure Docker Registry"
build_images::configure_docker_registry
start_end::group_end

start_end::group_start "Waiting for ${image_name_with_tag}"
push_pull_remove_images::wait_for_image "${image_name_with_tag}"
build_images::prepare_prod_build
start_end::group_end

pull_prod_image
push_pull_remove_images::wait_for_image "${image_name_with_tag}"

if [[ ${VERIFY_IMAGE=} != "false" ]]; then
verify_image::verify_prod_image "${image_name_with_tag}"
Expand Down
95 changes: 27 additions & 68 deletions scripts/ci/libraries/_build_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ function build_images::get_docker_cache_image_names() {
export PYTHON_BASE_IMAGE="python:${PYTHON_MAJOR_MINOR_VERSION}-slim-buster"

local image_name
image_name="${GITHUB_REGISTRY}/$(build_images::get_github_container_registry_image_prefix)"
image_name="ghcr.io/$(build_images::get_github_container_registry_image_prefix)"

# Example:
# ghcr.io/apache/airflow/main/python:3.8-slim-buster
Expand Down Expand Up @@ -412,36 +412,33 @@ function build_images::get_docker_cache_image_names() {
}

# If GitHub Registry is used, login to the registry using GITHUB_USERNAME and
# GITHUB_TOKEN. In case Personal Access token is not set, skip logging in
# Also enable experimental features of docker (we need `docker manifest` command)
function build_images::configure_docker_registry() {
local token="${GITHUB_TOKEN}"
if [[ -z "${token}" ]]; then
verbosity::print_info
verbosity::print_info "Skip logging in to GitHub Registry. No Token available!"
verbosity::print_info
elif [[ ${AIRFLOW_LOGIN_TO_GITHUB_REGISTRY=} != "true" ]]; then
verbosity::print_info
verbosity::print_info "Skip logging in to GitHub Registry. AIRFLOW_LOGIN_TO_GITHUB_REGISTRY != true"
verbosity::print_info
elif [[ -n "${token}" ]]; then
echo "${token}" | docker_v login \
--username "${GITHUB_USERNAME:-apache}" \
--password-stdin \
"${GITHUB_REGISTRY}"
else
verbosity::print_info "Skip Login to GitHub Registry ${GITHUB_REGISTRY} as token is missing"
fi
if [[ ${CI} == "true" ]]; then
# we only need that on CI in order to run "docker manifest" when we check if remote image exists
# See function push_pull_remove_images::check_image_manifest()
local new_config
new_config=$(jq '.experimental = "enabled"' "${HOME}/.docker/config.json" || true)
if [[ ${new_config} != "" ]]; then
echo "${new_config}" > "${HOME}/.docker/config.json"
# GITHUB_TOKEN. We only need to login to docker registry on CI and only when we push
# images. All other images we pull from docker registry are public and we do not need
# to login there.
function build_images::login_to_docker_registry() {
if [[ "${CI}" == "true" ]]; then
start_end::group_start "Configure Docker Registry"
local token="${GITHUB_TOKEN}"
if [[ -z "${token}" ]]; then
verbosity::print_info
verbosity::print_info "Skip logging in to GitHub Registry. No Token available!"
verbosity::print_info
elif [[ ${AIRFLOW_LOGIN_TO_GITHUB_REGISTRY=} != "true" ]]; then
verbosity::print_info
verbosity::print_info "Skip logging in to GitHub Registry. AIRFLOW_LOGIN_TO_GITHUB_REGISTRY != true"
verbosity::print_info
elif [[ -n "${token}" ]]; then
# logout from the repository first - so that we do not keep us logged in if the token
# already expired (which can happen if we have a long build running)
docker_v logout "ghcr.io"
# The login might succeed or not - in some cases, when we pull public images in forked
# repos it might fail, but the pulls will continue to work
echo "${token}" | docker_v login \
--username "${GITHUB_USERNAME:-apache}" \
--password-stdin \
"ghcr.io" || true
else
echo "${COLOR_YELLOW}Could not set experimental flag in ${HOME}/.docker/config.json ${COLOR_RESET}"
echo "${COLOR_YELLOW}docker manifest command will likely not work${COLOR_RESET}"
verbosity::print_info "Skip Login to GitHub Container Registry as token is missing"
fi
fi
}
Expand All @@ -457,7 +454,6 @@ function build_images::prepare_ci_build() {
export AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS:="${DEFAULT_CI_EXTRAS}"}"
readonly AIRFLOW_EXTRAS

build_images::configure_docker_registry
sanity_checks::go_to_airflow_sources
permissions::fix_group_permissions
}
Expand Down Expand Up @@ -754,7 +750,6 @@ function build_images::prepare_prod_build() {
export AIRFLOW_EXTRAS="${AIRFLOW_EXTRAS:="${DEFAULT_PROD_EXTRAS}"}"
readonly AIRFLOW_EXTRAS

build_images::configure_docker_registry
AIRFLOW_BRANCH_FOR_PYPI_PRELOADING="${BRANCH_NAME}"
sanity_checks::go_to_airflow_sources
}
Expand Down Expand Up @@ -897,42 +892,6 @@ function build_images::tag_image() {
done
}

# Waits for image tag to appear in GitHub Registry, pulls it and tags with the target tag
# Parameters:
# $1 - image name to wait for
# $2 - fallback image to wait for
# $3, $4, ... - target tags to tag the image with
function build_images::wait_for_image_tag() {

local image_name="${1}"
local image_suffix="${2}"
shift 2

local image_to_wait_for="${image_name}${image_suffix}"
start_end::group_start "Wait for image tag ${image_to_wait_for}"
while true; do
set +e
echo "${COLOR_BLUE}Docker pull ${image_to_wait_for} ${COLOR_RESET}" >"${OUTPUT_LOG}"
docker_v pull "${image_to_wait_for}" >>"${OUTPUT_LOG}" 2>&1
set -e
local image_hash
echo "${COLOR_BLUE} Docker images -q ${image_to_wait_for}${COLOR_RESET}" >>"${OUTPUT_LOG}"
image_hash="$(docker images -q "${image_to_wait_for}" 2>>"${OUTPUT_LOG}" || true)"
if [[ -z "${image_hash}" ]]; then
echo
echo "The image ${image_to_wait_for} is not yet available. No local hash for the image"
echo
echo "Last log:"
cat "${OUTPUT_LOG}" || true
echo
else
build_images::tag_image "${image_to_wait_for}" "${image_name}:latest" "${@}"
break
fi
done
start_end::group_end
}

# We use pulled docker image cache by default for CI images to speed up the builds
# and local to speed up iteration on kerberos tests
function build_images::determine_docker_cache_strategy() {
Expand Down
3 changes: 0 additions & 3 deletions scripts/ci/libraries/_initialization.sh
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,6 @@ function initialization::initialize_git_variables() {
}

function initialization::initialize_github_variables() {
# Defaults for interacting with GitHub
export GITHUB_REGISTRY="ghcr.io"
export GITHUB_REGISTRY_WAIT_FOR_IMAGE=${GITHUB_REGISTRY_WAIT_FOR_IMAGE:="false"}
export GITHUB_REGISTRY_PULL_IMAGE_TAG=${GITHUB_REGISTRY_PULL_IMAGE_TAG:="latest"}
export GITHUB_REGISTRY_PUSH_IMAGE_TAG=${GITHUB_REGISTRY_PUSH_IMAGE_TAG:="latest"}
Expand Down Expand Up @@ -843,7 +841,6 @@ function initialization::make_constants_read_only() {
readonly ADDITIONAL_RUNTIME_APT_DEPS
readonly ADDITIONAL_RUNTIME_APT_ENV

readonly GITHUB_REGISTRY
readonly GITHUB_REGISTRY_WAIT_FOR_IMAGE
readonly GITHUB_REGISTRY_PULL_IMAGE_TAG
readonly GITHUB_REGISTRY_PUSH_IMAGE_TAG
Expand Down
38 changes: 20 additions & 18 deletions scripts/ci/libraries/_push_pull_remove_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ function push_pull_remove_images::pull_image_if_not_present_or_forced() {
echo
echo "Pulling the image ${image_to_pull}"
echo
docker_v pull "${image_to_pull}"
docker pull "${image_to_pull}"
fi
}

Expand Down Expand Up @@ -262,32 +262,34 @@ function push_pull_remove_images::push_prod_images_to_github () {
fi
}

# waits for an image to be available in GitHub Container Registry. Should be run with `set +e`
function push_pull_remove_images::check_image_manifest() {
local image_to_wait_for="${1}"
echo "GitHub Container Registry: checking for ${image_to_wait_for} via docker manifest inspect!"
docker_v manifest inspect "${image_to_wait_for}"
local res=$?
if [[ ${res} == "0" ]]; then
echo "Image: ${image_to_wait_for} found in Container Registry: ${COLOR_GREEN}OK.${COLOR_RESET}"
return 0
else
echo "${COLOR_YELLOW}Still waiting. Not found!${COLOR_RESET}"
return 1
fi
}

# waits for an image to be available in the GitHub registry
function push_pull_remove_images::wait_for_image() {
# Maximum number of tries 100 = we try for max. 100 minutes.
local MAX_TRIES=100
set +e
echo " Waiting for github registry image: $1"
local count=0
while true
do
if push_pull_remove_images::check_image_manifest "$1"; then
export IMAGE_AVAILABLE="$1"
if push_pull_remove_images::pull_image_if_not_present_or_forced "$1"; then
break
fi
sleep 30
if [[ ${count} == "${MAX_TRIES}" ]]; then
echo "${COLOR_RED}Giving up after ${MAX_TRIES}!${COLOR_RESET}"
echo "If there were delays with building the image, maintainers could potentially restart the build when the images are ready!"
echo "Or you can run 'git commit --amend' and then push the PR again with 'git push --force-with-lease' to re-trigger the build."
return 1
fi
echo "${COLOR_YELLOW}Failed to pull the image for ${count} time. Sleeping!${COLOR_RESET}"
sleep 60
count=$((count + 1))
done
set -e
}

function push_pull_remove_images::pull_image() {
start_end::group_start "Pulling image: $1"
push_pull_remove_images::pull_image_if_not_present_or_forced "$1"
start_end::group_end
}
4 changes: 4 additions & 0 deletions scripts/ci/tools/free_space.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,7 @@ docker_v system prune --all --force --volumes

echo "${COLOR_BLUE}Free disk space ${COLOR_RESET}"
df -h

# always logout from the docker registry - this is necessary as we can have an expired token from
# previous job!.
docker_v logout "ghcr.io"

0 comments on commit a8184e4

Please sign in to comment.