Skip to content

fix: remove veda-config related steps #6

fix: remove veda-config related steps

fix: remove veda-config related steps #6

Workflow file for this run

# This GitHub Actions workflow automates the process of
# publishing dataset collections to a staging environment
# It is triggered by a pull request to the main branch
# that modifies any files within the ingestion-data/dataset-config/ directory
# The workflow includes steps to
# - publish the datasets,
# - constantly updates the status of the workflow in the PR comment
name: Publish collection to staging
on:
pull_request:
branches: ['main', 'feat/automated-staging-publish']
paths:
# Run the workflow only if files inside this path are updated
# - ingestion-data/staging/dataset-config/*
- ingestion-data/testing/dataset-config/*
jobs:
dataset-publication-and-configuration:
permissions:
pull-requests: write
contents: read
runs-on: ubuntu-latest
environment: staging
steps:
- uses: actions/checkout@v4
# Initializes the PR comment
# Edits existing or creates new comment
# Why? - Cleanliness!
- name: Initialize PR comment with workflow start
id: init-comment
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
body="### Workflow Status
**Starting workflow...** [View action run]($WORKFLOW_URL)"
echo "Body content: $body"
# Get the PR number
PR_NUMBER=${{ github.event.pull_request.number }}
# Fetch existing comments
COMMENTS=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments --jq '.[] | select(.body | contains("### Workflow Status")) | {id: .id, body: .body}')
# Check if a comment already exists
COMMENT_ID=$(echo "$COMMENTS" | jq -r '.id' | head -n 1)
if [ -z "$COMMENT_ID" ]; then
# No existing comment, create a new one
COMMENT_ID=$(gh api repos/${{ github.repository }}/issues/${PR_NUMBER}/comments -f body="$body" --jq '.id')
else
# Comment exists, overwrite the existing comment
gh api repos/${{ github.repository }}/issues/comments/$COMMENT_ID -X PATCH -f body="$body"
fi
echo "COMMENT_ID=$COMMENT_ID" >> $GITHUB_OUTPUT
# Find only the updated files (file that differ from base)
# Only .json files
# The files are outputted to GITHUB_OUTPUT, which can be used in subsequent steps
- name: Get updated files
id: changed-files
uses: tj-actions/changed-files@v44
with:
files: |
**.json
# Uses service client creds to get token
# No username/password needed
- name: Get auth token
id: get-token
run: |
echo "Vars: $vars"
response=$(curl -X POST \
${{ vars.STAGING_COGNITO_DOMAIN }}/oauth2/token \
-H "Content-Type: application/x-www-form-urlencoded" \
-d "grant_type=client_credentials" \
-d "client_id=${{ vars.STAGING_CLIENT_ID }}" \
-d "client_secret=${{ vars.STAGING_CLIENT_SECRET }}"
)
access_token=$(echo "$response" | jq -r '.access_token')
echo "ACCESS_TOKEN=$access_token" >> $GITHUB_OUTPUT
# Makes request to /dataset/publish endpoint
# Outputs only files that were successfully published
# Used by other steps
# If none of the requests are successful, workflow fails
# Updates the PR comment with status of collection publication
- name: Publish all updated collections
id: publish-collections
env:
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
WORKFLOWS_URL: ${{ vars.STAGING_WORKFLOWS_URL }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
AUTH_TOKEN: ${{ steps.get-token.outputs.ACCESS_TOKEN }}
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
run: |
if [ -z "$WORKFLOWS_URL" ]; then
echo "WORKFLOWS_URL is not set"
exit 1
fi
if [ -z "$AUTH_TOKEN" ]; then
echo "AUTH_TOKEN is not set"
exit 1
fi
publish_url="${WORKFLOWS_URL%/}/dataset/publish"
bearer_token=$AUTH_TOKEN
# Track successful publications
all_failed=true
success_collections=()
status_message='### Collection Publication Status
'
for file in "${ALL_CHANGED_FILES[@]}"; do
echo $file
if [ -f "$file" ]; then
dataset_config=$(jq '.' "$file")
collection_id=$(jq -r '.collection' "$file")
response=$(curl -s -w "%{http_code}" -o response.txt -X POST "$publish_url" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $AUTH_TOKEN" \
-d "$dataset_config"
)
status_code=$(tail -n1 <<< "$response")
echo "Status code is: $status_code"
# Update status message based on response code
if [ "$status_code" -eq 200 ] || [ "$status_code" -eq 201 ]; then
echo "$collection_id successfully published ✅"
status_message+="- **$collection_id**: Successfully published ✅
"
success_collections+=("$file")
all_failed=false
else
echo "$collection_id failed to publish ❌"
status_message+="- **$collection_id**: Failed to publish ❌
"
fi
else
echo "File $file does not exist"
exit 1
fi
done
# Exit workflow if all the requests fail
if [ "$all_failed" = true ]; then
echo "All collections failed to publish."
exit 1
fi
# Output only successful collections to be used in subsequent steps
echo "success_collections=$(IFS=','; echo "${success_collections[*]}")" >> $GITHUB_OUTPUT
# Update PR comment
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
UPDATED_BODY="$CURRENT_BODY
$status_message"
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
cache: 'pip'
# Creates a slim dataset mdx file for each collection based on the dataset config json
- name: Create dataset mdx for given collections
env:
PUBLISHED_COLLECTION_FILES: ${{ steps.publish-collections.outputs.success_collections }}
run: |
pip install -r scripts/requirements.txt
for file in "${PUBLISHED_COLLECTION_FILES[@]}"
do
python3 scripts/mdx.py "$file"
done
# If the workflow fails at any point, the PR comment will be updated
- name: Update PR comment on overall workflow failure
if: failure()
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COMMENT_ID: ${{ steps.init-comment.outputs.COMMENT_ID }}
run: |
WORKFLOW_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
CURRENT_BODY=$(gh api -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID --jq '.body')
UPDATED_BODY="$CURRENT_BODY
** ❌ The workflow run failed. [See logs here]($WORKFLOW_URL)**"
gh api -X PATCH -H "Authorization: token $GITHUB_TOKEN" /repos/${{ github.repository }}/issues/comments/$COMMENT_ID -f body="$UPDATED_BODY"