From 9466a8f47315e3307a2991f5e55f0efbe53f5bfe Mon Sep 17 00:00:00 2001 From: zacharyburnett Date: Wed, 25 Oct 2023 12:27:27 -0400 Subject: [PATCH 1/4] add data workflow to cache WebbPSF data --- .github/workflows/ci.yml | 8 ++++- .github/workflows/data.yml | 70 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/data.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c871d446..c6144fa1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,13 +27,19 @@ jobs: - macos: py311-xdist - linux: py311-cov-xdist coverage: 'codecov' + data: + uses: ./.github/workflows/data.yml test_downstream: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 + needs: [ data ] with: setenv: | - CRDS_PATH: ${{ needs.crds.outputs.path }} + WEBBPSF_PATH: ${{ needs.data.outputs.webbpsf_path }} + CRDS_PATH: ${{ needs.data.outputs.path }}/crds_cache CRDS_CLIENT_RETRY_COUNT: 3 CRDS_CLIENT_RETRY_DELAY_SECONDS: 20 + cache-path: ${{ needs.data.outputs.webbpsf_path }} + cache-key: webbpsf-${{ needs.data.outputs.webbpsf_hash }} envs: | - linux: test-jwst-xdist - linux: test-romancal-xdist diff --git a/.github/workflows/data.yml b/.github/workflows/data.yml new file mode 100644 index 00000000..f67396df --- /dev/null +++ b/.github/workflows/data.yml @@ -0,0 +1,70 @@ +on: + workflow_call: + outputs: + path: + value: ${{ jobs.data.outputs.path }} + webbpsf_path: + value: ${{ jobs.data.outputs.webbpsf_path }} + webbpsf_hash: + value: ${{ jobs.data.outputs.webbpsf_hash }} + workflow_dispatch: + schedule: + - cron: "42 4 * * 3" + +env: + DATA_PATH: /tmp/data + +jobs: + webbpsf-data: + if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) + name: download and cache WebbPSF data + runs-on: ubuntu-latest + env: + WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz + outputs: + path: ${{ steps.cache_path.outputs.path }} + hash: ${{ steps.data_hash.outputs.hash }} + steps: + - id: cache_path + run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT + - run: mkdir -p tmp/data + - run: wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz + - id: data_hash + run: echo "hash=$( shasum tmp/webbpsf-data.tar.gz | cut -d ' ' -f 1 )" >> $GITHUB_OUTPUT + - id: cache_check + uses: actions/cache@v3 + with: + path: ${{ steps.cache_path.outputs.path }} + key: webbpsf-${{ steps.data_hash.outputs.hash }} + - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} + run: mkdir -p ${{ env.DATA_PATH }} + - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} + run: tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ env.DATA_PATH }} + data: + needs: [ webbpsf-data ] + # run data job if webbpsf-data succeeds or is skipped. This allows + # this data job to always fetch the crds context even if the webbpsf data fetching + # was skipped (and an existing cache will be used for the webbpsf data). + if: always() && (needs.webbpsf-data.result == 'success' || needs.webbpsf-data.result == 'skipped') + name: retrieve latest data cache key + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ github.token }} + outputs: + path: ${{ env.DATA_PATH }} + webbpsf_hash: ${{ steps.webbpsf_hash.outputs.hash }} + webbpsf_path: ${{ steps.webbpsf_path.outputs.path }} + steps: + - id: webbpsf_hash + run: | + # use actions/gh-actions-cache to allow filtering by key + gh extension install actions/gh-actions-cache + + RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1) + echo "RECENT=$RECENT" + HASH=$(echo $RECENT | cut -d '-' -f 2) + echo "HASH=$HASH" + echo "hash=$HASH" >> $GITHUB_OUTPUT + if [ "$HASH" == '' ]; then exit 1; fi + - id: webbpsf_path + run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT From da9c88387158e63a34b6021ffbe88975b2cb1f1e Mon Sep 17 00:00:00 2001 From: zacharyburnett Date: Wed, 25 Oct 2023 12:33:59 -0400 Subject: [PATCH 2/4] fix cache key lookup --- .github/workflows/data.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/data.yml b/.github/workflows/data.yml index f67396df..9a4af7e8 100644 --- a/.github/workflows/data.yml +++ b/.github/workflows/data.yml @@ -16,7 +16,7 @@ env: jobs: webbpsf-data: - if: (github.repository == 'spacetelescope/romancal' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) + if: (github.repository == 'spacetelescope/stpipe' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) name: download and cache WebbPSF data runs-on: ubuntu-latest env: @@ -60,7 +60,7 @@ jobs: # use actions/gh-actions-cache to allow filtering by key gh extension install actions/gh-actions-cache - RECENT=$(gh actions-cache list -R spacetelescope/romancal --key webbpsf- --sort created-at | cut -f 1 | head -n 1) + RECENT=$(gh actions-cache list -R spacetelescope/stpipe --key webbpsf- --sort created-at | cut -f 1 | head -n 1) echo "RECENT=$RECENT" HASH=$(echo $RECENT | cut -d '-' -f 2) echo "HASH=$HASH" From 0c9f681f9baca956e3127ccf832f865f8d30bd04 Mon Sep 17 00:00:00 2001 From: zacharyburnett Date: Wed, 25 Oct 2023 12:51:17 -0400 Subject: [PATCH 3/4] simplify data workflow --- .github/workflows/data.yml | 45 ++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/.github/workflows/data.yml b/.github/workflows/data.yml index 9a4af7e8..7f0306a6 100644 --- a/.github/workflows/data.yml +++ b/.github/workflows/data.yml @@ -2,11 +2,11 @@ on: workflow_call: outputs: path: - value: ${{ jobs.data.outputs.path }} + value: ${{ jobs.path.outputs.path }} webbpsf_path: - value: ${{ jobs.data.outputs.webbpsf_path }} + value: ${{ jobs.webbpsf_path.outputs.path }} webbpsf_hash: - value: ${{ jobs.data.outputs.webbpsf_hash }} + value: ${{ jobs.webbpsf_data.outputs.hash }} workflow_dispatch: schedule: - cron: "42 4 * * 3" @@ -15,18 +15,29 @@ env: DATA_PATH: /tmp/data jobs: - webbpsf-data: + path: + runs-on: ubuntu-latest + outputs: + path: ${{ steps.path.outputs.path }} + steps: + - id: path + run: echo "path=${{ env.DATA_PATH }}" >> $GITHUB_OUTPUT + webbpsf_path: + needs: [ path ] + runs-on: ubuntu-latest + outputs: + path: ${{ steps.path.outputs.path }} + steps: + - id: path + run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT + webbpsf_data: if: (github.repository == 'spacetelescope/stpipe' && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'update webbpsf data'))) + needs: [ webbpsf_path ] name: download and cache WebbPSF data runs-on: ubuntu-latest env: WEBBPSF_DATA_URL: https://stsci.box.com/shared/static/qxpiaxsjwo15ml6m4pkhtk36c9jgj70k.gz - outputs: - path: ${{ steps.cache_path.outputs.path }} - hash: ${{ steps.data_hash.outputs.hash }} steps: - - id: cache_path - run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT - run: mkdir -p tmp/data - run: wget ${{ env.WEBBPSF_DATA_URL }} -O tmp/webbpsf-data.tar.gz - id: data_hash @@ -34,28 +45,26 @@ jobs: - id: cache_check uses: actions/cache@v3 with: - path: ${{ steps.cache_path.outputs.path }} + path: ${{ needs.webbpsf_path.outputs.path }} key: webbpsf-${{ steps.data_hash.outputs.hash }} - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} run: mkdir -p ${{ env.DATA_PATH }} - if: ${{ steps.cache_check.outputs.cache-hit != 'true' }} run: tar -xzvf tmp/webbpsf-data.tar.gz -C ${{ env.DATA_PATH }} - data: - needs: [ webbpsf-data ] + webbpsf_hash: + needs: [ webbpsf_path, webbpsf_data ] # run data job if webbpsf-data succeeds or is skipped. This allows # this data job to always fetch the crds context even if the webbpsf data fetching # was skipped (and an existing cache will be used for the webbpsf data). - if: always() && (needs.webbpsf-data.result == 'success' || needs.webbpsf-data.result == 'skipped') + if: always() && (needs.webbpsf_data.result == 'success' || needs.webbpsf_data.result == 'skipped') name: retrieve latest data cache key runs-on: ubuntu-latest env: GH_TOKEN: ${{ github.token }} outputs: - path: ${{ env.DATA_PATH }} - webbpsf_hash: ${{ steps.webbpsf_hash.outputs.hash }} - webbpsf_path: ${{ steps.webbpsf_path.outputs.path }} + hash: ${{ steps.hash.outputs.hash }} steps: - - id: webbpsf_hash + - id: hash run: | # use actions/gh-actions-cache to allow filtering by key gh extension install actions/gh-actions-cache @@ -66,5 +75,3 @@ jobs: echo "HASH=$HASH" echo "hash=$HASH" >> $GITHUB_OUTPUT if [ "$HASH" == '' ]; then exit 1; fi - - id: webbpsf_path - run: echo "path=${{ env.DATA_PATH }}/webbpsf-data" >> $GITHUB_OUTPUT From 1b5b8c0dd11c89177918e4284fc6ff0b6de1dfbe Mon Sep 17 00:00:00 2001 From: zacharyburnett Date: Wed, 25 Oct 2023 13:02:11 -0400 Subject: [PATCH 4/4] pass WEBBPSF_PATH to toxenv --- tox.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/tox.ini b/tox.ini index edfac95a..28612957 100644 --- a/tox.ini +++ b/tox.ini @@ -54,6 +54,7 @@ deps = pass_env = CRDS_* CI + WEBBPSF_PATH set_env = devdeps: PIP_EXTRA_INDEX_URL = https://pypi.anaconda.org/astropy/simple https://pypi.anaconda.org/scientific-python-nightly-wheels/simple jwst: CRDS_SERVER_URL=https://jwst-crds.stsci.edu