generated from ministryofjustice/template-repository
-
Notifications
You must be signed in to change notification settings - Fork 0
116 lines (104 loc) · 3.69 KB
/
ingest-glue-data.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
name: "Ingest metadata from AP glue catalogue"
permissions:
id-token: write
contents: read
on:
workflow_call:
inputs:
ECR_REGION:
description: "ecr region to connect to"
required: false
type: string
default: eu-west-1
ENVIRONMENT:
description: "Environment to use for secrets"
required: true
type: string
secrets:
DATAHUB_GMS_TOKEN:
description: "API Key for datahub GMS"
required: true
GLUE_ROLE_TO_ASSUME:
description: "AWS role to assume, which can access AP glue"
required: true
SLACK_ALERT_WEBHOOK:
description: "Webhook for posting alerts to the team"
required: true
SOP_TECHNICAL_CONTACT:
required: true
CONTRACTS_TECHNICAL_CONTACT:
required: true
COURTS_CRIMINAL_TECHNICAL_CONTACT:
required: true
COURTS_FAMILY_TECHNICAL_CONTACT:
required: true
JUST_LINK_TECHNICAL_CONTACT:
required: true
FINES_TECHNICAL_CONTACT:
required: true
jobs:
datahub-ingest-glue-data:
environment: ${{ inputs.ENVIRONMENT }}
timeout-minutes: 120
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: ${{ secrets.GLUE_ROLE_TO_ASSUME }}
role-duration-seconds: 3600
aws-region: ${{ inputs.ECR_REGION }}
- name: cache poetry install
uses: actions/cache@v4
with:
path: ~/.local
key: poetry-1.7.1-0
- uses: snok/install-poetry@v1
with:
version: 1.7.1
virtualenvs-create: true
virtualenvs-in-project: true
- name: cache deps
id: cache-deps
uses: actions/cache@v4
with:
path: .venv
key: pydeps-${{ hashFiles('**/poetry.lock') }}
- run: poetry install --no-interaction --no-root
if: steps.cache-deps.outputs.cache-hit != 'true'
- run: poetry install --no-interaction
- name: Inject owners to glue ingestion config
env:
SOP_TECHNICAL_CONTACT: ${{ secrets.SOP_TECHNICAL_CONTACT }}
CONTRACTS_TECHNICAL_CONTACT: ${{ secrets.CONTRACTS_TECHNICAL_CONTACT }}
COURTS_CRIMINAL_TECHNICAL_CONTACT: ${{ secrets.COURTS_CRIMINAL_TECHNICAL_CONTACT }}
COURTS_FAMILY_TECHNICAL_CONTACT: ${{ secrets.COURTS_FAMILY_TECHNICAL_CONTACT }}
JUST_LINK_TECHNICAL_CONTACT: ${{ secrets.JUST_LINK_TECHNICAL_CONTACT }}
FINES_TECHNICAL_CONTACT: ${{ secrets.FINES_TECHNICAL_CONTACT }}
run: |
mkdir -p ingestion/processed
for config in contracts courts_criminal courts_family fines just_link sop; do
envsubst < ingestion/glue_${config}.yaml > ingestion/processed/glue_${config}.yaml
done
- name: datahub glue ingestion using config with owners
env:
DATAHUB_GMS_TOKEN: ${{ secrets.DATAHUB_GMS_TOKEN }}
DATAHUB_GMS_URL: ${{ vars.DATAHUB_GMS_URL }}
DATAHUB_TELEMETRY_ENABLED: false
run: |
for ingestion in ingestion/processed/glue_*.yaml; do
time poetry run datahub ingest -c "$ingestion"
done
- name: Notify on failure
uses: slackapi/slack-github-action@v1.27.0
if: failure()
with:
payload: |
{
"text": ":warning: Glue ingestion failure on ${{inputs.ENVIRONMENT}} ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_ALERT_WEBHOOK }}