Skip to content

Commit 8e0caf8

Browse files
committed
added deferrable DAG and opa rules/users
1 parent 54da62c commit 8e0caf8

File tree

12 files changed

+245
-3
lines changed

12 files changed

+245
-3
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
---
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: start-users-job
6+
spec:
7+
template:
8+
spec:
9+
containers:
10+
- name: start-users-job
11+
image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev
12+
# N.B. it is possible for the scheduler to report that a DAG exists,
13+
# only for the worker task to fail if a pod is unexpectedly
14+
# restarted. The wait/watch steps below are not "water-tight" but add
15+
# a layer of stability by at least ensuring that the cluster is
16+
# initialized and ready and that all pods are reachable (albeit
17+
# independent of each other).
18+
command:
19+
- bash
20+
- -euo
21+
- pipefail
22+
- -c
23+
- |
24+
# Airflow: wait for cluster
25+
kubectl rollout status --watch statefulset/airflow-webserver-default
26+
kubectl rollout status --watch statefulset/airflow-scheduler-default
27+
28+
# Airflow: create users
29+
kubectl exec airflow-webserver-default-0 -- airflow users create \
30+
--username "jane.doe" \
31+
--firstname "Jane" \
32+
--lastname "Doe" \
33+
--email "jane.doe@stackable.tech" \
34+
--password "jane.doe" \
35+
--role "User"
36+
37+
kubectl exec airflow-webserver-default-0 -- airflow users create \
38+
--username "richard.roe" \
39+
--firstname "Richard" \
40+
--lastname "Roe" \
41+
--email "richard.roe@stackable.tech" \
42+
--password "richard.roe" \
43+
--role "User"
44+
volumeMounts:
45+
- name: airflow-credentials
46+
mountPath: /airflow-credentials
47+
volumes:
48+
- name: airflow-credentials
49+
secret:
50+
secretName: airflow-credentials
51+
restartPolicy: OnFailure
52+
backoffLimit: 20 # give some time for the Airflow cluster to be available

demos/demos-v2.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ demos:
4848
manifests:
4949
- plainYaml: demos/airflow-scheduled-job/01-airflow-demo-clusterrole.yaml
5050
- plainYaml: demos/airflow-scheduled-job/02-airflow-demo-clusterrolebinding.yaml
51-
#- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml
51+
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/03-enable-and-run-spark-dag.yaml
5252
- plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/airflow-scheduled-job/04-enable-and-run-date-dag.yaml
5353
- plainYaml: demos/airflow-scheduled-job/05-enable-and-run-kafka-dag.yaml
54+
- plainYaml: demos/airflow-scheduled-job/06-create-opa-users.yaml
5455
supportedNamespaces: []
5556
resourceRequests:
5657
cpu: 2401m
14.8 KB
Loading
17.4 KB
Loading
17.9 KB
Loading
17.6 KB
Loading
17.4 KB
Loading
18.3 KB
Loading

stacks/airflow/airflow.yaml

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,14 @@ spec:
99
productVersion: 3.0.6
1010
pullPolicy: IfNotPresent
1111
clusterConfig:
12+
authorization:
13+
opa:
14+
configMapName: opa-airflow
15+
package: airflow
16+
cache:
17+
entryTimeToLive: 5s
18+
maxEntries: 10
1219
loadExamples: false
13-
exposeConfig: false
1420
credentialsSecret: airflow-credentials
1521
volumes:
1622
- name: airflow-dags
@@ -32,6 +38,9 @@ spec:
3238
- name: airflow-dags
3339
mountPath: /dags/kafka.py
3440
subPath: kafka.py
41+
- name: airflow-dags
42+
mountPath: /dags/triggerer.py
43+
subPath: triggerer.py
3544
- name: kafka-tls-pem
3645
mountPath: /stackable/kafka-tls-pem
3746
webservers:
@@ -48,7 +57,12 @@ spec:
4857
AIRFLOW__CORE__DAGS_FOLDER: "/dags"
4958
PYTHONPATH: "/stackable/app/log_config:/dags"
5059
AIRFLOW_CONN_KUBERNETES_IN_CLUSTER: "kubernetes://?__extra__=%7B%22extra__kubernetes__in_cluster%22%3A+true%2C+%22extra__kubernetes__kube_config%22%3A+%22%22%2C+%22extra__kubernetes__kube_config_path%22%3A+%22%22%2C+%22extra__kubernetes__namespace%22%3A+%22%22%7D"
51-
#AIRFLOW_CONN_KAFKA_CONN: "{\"conn_type\": \"kafka\", \"extra\": {\"bootstrap.servers\": \"kafka-broker-default-0-listener-broker.{{ NAMESPACE }}.svc.cluster.local:9093\", \"security.protocol\": \"SSL\", \"ssl.ca.location\": \"/stackable/kafka-tls-pem/ca.crt\", \"group.id\": \"airflow_group\", \"auto.offset.reset\": \"latest\"}}"
60+
# Airflow 3: Disable decision caching for easy debugging
61+
AIRFLOW__CORE__AUTH_OPA_CACHE_MAXSIZE: "0"
62+
configOverrides:
63+
webserver_config.py:
64+
# Allow "POST /login/" without CSRF token
65+
WTF_CSRF_ENABLED: "False"
5266
podOverrides: &podOverrides
5367
spec:
5468
containers:
@@ -66,6 +80,8 @@ spec:
6680
default:
6781
replicas: 1
6882
kubernetesExecutors:
83+
# do not apply the podOverrides here as we don't need and it will interfere
84+
# with the pod template
6985
envOverrides: *envOverrides
7086
schedulers:
7187
envOverrides: *envOverrides
@@ -127,6 +143,61 @@ data:
127143
with DAG(dag_id="kafka_watcher", schedule=[asset]) as dag:
128144
EmptyOperator(task_id="task")
129145
146+
triggerer.py: |
147+
from datetime import datetime, timedelta
148+
149+
from airflow import DAG
150+
from airflow.models.baseoperator import BaseOperator
151+
from airflow.triggers.temporal import TimeDeltaTrigger
152+
from airflow.utils.context import Context
153+
from airflow.operators.empty import EmptyOperator
154+
155+
# ------------------------------------------------------
156+
# Custom deferrable operator - does a simple async sleep
157+
# ------------------------------------------------------
158+
class CoreDeferrableSleepOperator(BaseOperator):
159+
"""
160+
Sleeps for ``duration`` seconds without occupying a worker.
161+
The async hand-off happens via ``self.defer`` + ``TimeDeltaTrigger``.
162+
"""
163+
ui_color = "#ffefeb"
164+
165+
def __init__(self, *, duration: int, **kwargs):
166+
super().__init__(**kwargs)
167+
self.duration = duration
168+
169+
def execute(self, context: Context):
170+
"""Run on a worker, then hand control to the Triggerer."""
171+
# Build the trigger that will fire after `duration` seconds.
172+
trigger = TimeDeltaTrigger(timedelta(seconds=self.duration))
173+
174+
# *** Asynchronous hand-off ***
175+
# This tells the scheduler: “pause this task, let the Triggerer watch the timer”.
176+
self.defer(trigger=trigger, method_name="execute_complete")
177+
178+
def execute_complete(self, context: Context, event=None):
179+
"""Resumes here once the Triggerer fires."""
180+
self.log.info("Deferrable sleep of %s seconds finished.", self.duration)
181+
return "DONE"
182+
183+
default_args = {"owner": "stackable", "retries": 0}
184+
185+
with DAG(
186+
dag_id="core_deferrable_sleep_demo",
187+
schedule=None,
188+
# N.B. this be earlier than the current timestamp!
189+
start_date=datetime(2025, 8, 1),
190+
catchup=False,
191+
default_args=default_args,
192+
tags=["example", "triggerer"],
193+
) as dag:
194+
195+
sleep = CoreDeferrableSleepOperator(
196+
task_id="deferrable_sleep",
197+
duration=10,
198+
)
199+
200+
sleep
130201
date_demo.py: |
131202
"""Example DAG returning the current date"""
132203
from datetime import datetime, timedelta

stacks/airflow/opa-rules.yaml

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
---
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: airflow-rules
6+
labels:
7+
opa.stackable.tech/bundle: "true"
8+
data:
9+
airflow.rego: |
10+
package airflow
11+
12+
default is_authorized_configuration := false
13+
default is_authorized_connection := false
14+
default is_authorized_dag := false
15+
default is_authorized_backfill := false
16+
default is_authorized_asset := false
17+
default is_authorized_asset_alias := false
18+
19+
# Allow the user "admin" to create test users
20+
# POST /auth/fab/v1/users
21+
is_authorized_custom_view if {
22+
input.method == "POST"
23+
input.resource_name == "Users"
24+
25+
input.user.name == "admin"
26+
}
27+
is_authorized_configuration if {
28+
input.user.name == "admin"
29+
}
30+
is_authorized_configuration if {
31+
input.user.name == "admin"
32+
}
33+
is_authorized_connection if {
34+
input.user.name == "admin"
35+
}
36+
is_authorized_dag if {
37+
input.user.name == "admin"
38+
}
39+
is_authorized_dataset if {
40+
input.user.name == "admin"
41+
}
42+
is_authorized_pool if {
43+
input.user.name == "admin"
44+
}
45+
is_authorized_variable if {
46+
input.user.name == "admin"
47+
}
48+
is_authorized_view if {
49+
input.user.name == "admin"
50+
}
51+
is_authorized_custom_view if {
52+
input.user.name == "admin"
53+
}
54+
is_authorized_backfill if {
55+
input.user.name == "admin"
56+
}
57+
is_authorized_asset if {
58+
input.user.name == "admin"
59+
}
60+
is_authorized_asset_alias if {
61+
input.user.name == "admin"
62+
}
63+
64+
# GET /home
65+
is_authorized_view if {
66+
input.access_view == "WEBSITE"
67+
68+
input.user.name == "jane.doe"
69+
}
70+
71+
is_authorized_dag if {
72+
input.method == "GET"
73+
input.user.name == "jane.doe"
74+
}
75+
76+
is_authorized_dag if {
77+
input.method == "GET"
78+
input.access_entity == "RUN"
79+
input.details.id == "core_deferrable_sleep_demo"
80+
81+
input.user.name == "jane.doe"
82+
}
83+
84+
is_authorized_dag if {
85+
input.method == "PUT"
86+
input.details.id == "core_deferrable_sleep_demo"
87+
88+
input.user.name == "jane.doe"
89+
}
90+
91+
is_authorized_dag if {
92+
input.method == "POST"
93+
input.details.id == "core_deferrable_sleep_demo"
94+
95+
input.user.name == "jane.doe"
96+
}

0 commit comments

Comments
 (0)