Skip to content
This repository has been archived by the owner on Aug 21, 2024. It is now read-only.

Transform image data #667

Transform image data

Transform image data #667

Workflow file for this run

name: Transform image data
on:
schedule:
- cron: "45 5 * * *"
push:
branches:
- main
workflow_dispatch:
inputs:
environment:
description: 'Choose your deployment target environment'
required: true
default: 'staging'
type: choice
options:
- 'production'
- 'staging'
permissions:
id-token: write
contents: read
jobs:
setup:
name: Setup workflow envs
runs-on: ubuntu-latest
outputs:
environment: ${{ steps.set-environment.outputs.environment }}
bucket: ${{ steps.set-bucket.outputs.bucket }}
steps:
# Only set target environment to production if the workflow is triggered by
# a scheduled execution or workflow_dispatch with production input selected
- name: Setup target environment
id: set-environment
run: |
if [ "${{ github.event_name }}" = "schedule" ] || [ "${{ inputs.environment }}" = "production" ]; then
echo "environment=production" >> $GITHUB_OUTPUT
else
echo "environment=staging" >> $GITHUB_OUTPUT
fi
- name: Setup target bucket
# Set target bucket according to target environment
id: set-bucket
run: |
if [ "${{ github.event_name }}" = "schedule" ] || [ "${{ inputs.environment }}" = "production" ]; then
echo "bucket=cloudx-json-bucket" >> $GITHUB_OUTPUT
else
echo "bucket=cid-bucket-staging" >> $GITHUB_OUTPUT
fi
parser:
name: "Transform image data"
runs-on: ubuntu-latest
needs: setup
env:
ENVIRONMENT: ${{ needs.setup.outputs.environment }}
BUCKET: ${{ needs.setup.outputs.bucket }}
steps:
- name: Clone repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout latest tagged release
if: env.ENVIRONMENT == 'production'
run: |
LATEST_RELEASE=$(git describe --tags `git rev-list --tags --max-count=1`)
git checkout $LATEST_RELEASE
- name: Configure AWS credentials for production
uses: aws-actions/configure-aws-credentials@v4
if: env.ENVIRONMENT == 'production'
with:
role-to-assume: arn:aws:iam::426579533370:role/github_actions_image_transformer
role-duration-seconds: 1800
aws-region: us-east-2
- name: Configure AWS credentials for staging
uses: aws-actions/configure-aws-credentials@v4
if: env.ENVIRONMENT == 'staging'
with:
role-to-assume: arn:aws:iam::426579533370:role/github_actions_cloud_image_directory_staging
role-duration-seconds: 1800
aws-region: us-east-2
- name: Bootstrap poetry
run: pipx install poetry
- name: Update PATH
if: ${{ matrix.os != 'Windows' }}
run: echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Configure poetry
run: poetry config virtualenvs.in-project true
- name: Set up cache
uses: actions/cache@v3
id: cache
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
run: |
# Using `timeout` is a safeguard against the Poetry command hanging for some reason.
timeout 10s poetry run pip --version || rm -rf .venv
- name: Install dependencies
run: |
poetry install
pipx install s3cmd
- name: Download from S3
run: |
s3cmd sync --acl-public --delete-removed --guess-mime-type --no-mime-magic \
s3://${BUCKET}/raw/ $(pwd)/raw/
- name: run transformer
run: |
poetry run cloudimagedirectory-transformer --origin.path=$(pwd)/raw --destination.path=$(pwd)/images --input.files=none --filter.until=default
- name: Upload to S3
run: |
s3cmd sync --acl-public --delete-removed --mime-type application/json \
$(pwd)/images/ s3://${BUCKET}/images/