diff --git a/.dockerignore b/.dockerignore index 31ef8bb9ac260..e913ed4f43c89 100644 --- a/.dockerignore +++ b/.dockerignore @@ -34,12 +34,15 @@ !chart !docs !licenses +!providers/ +!task_sdk/ # Add those folders to the context so that they are available in the CI container !scripts # Add tests and kubernetes_tests to context. !tests +!tests_common !kubernetes_tests !helm_tests !docker_tests diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4de511fdfafc2..c20fe916f92d4 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,10 +1,10 @@ # Core -/airflow/executors/ @kaxil @XD-DENG @ashb @o-nikolas @pierrejeambrun @hussein-awala -/airflow/jobs/ @kaxil @ashb @XD-DENG -/airflow/models/ @kaxil @XD-DENG @ashb +/airflow/executors/ @XD-DENG @ashb @o-nikolas @pierrejeambrun @hussein-awala +/airflow/jobs/ @ashb @XD-DENG +/airflow/models/ @XD-DENG @ashb # DAG Serialization -/airflow/serialization/ @kaxil @ashb @bolkedebruin +/airflow/serialization/ @ashb @bolkedebruin # DAG Parsing /airflow/dag_processing @jedcunningham @ephraimbuddy @@ -17,7 +17,7 @@ /chart/ @dstandish @jedcunningham @hussein-awala # Docs (without Providers) -/docs/*.py @kaxil @potiuk +/docs/*.py @potiuk /docs/apache-airflow @potiuk /docs/docker-stack @potiuk /docs/helm-chart @dstandish @jedcunningham @@ -58,7 +58,7 @@ /docs/apache-airflow/concepts/deferring.rst @dstandish @hussein-awala # Secrets Backends -/airflow/secrets @dstandish @kaxil @potiuk @ashb +/airflow/secrets @dstandish @potiuk @ashb # Providers /airflow/providers/amazon/ @eladkal @o-nikolas @@ -85,7 +85,7 @@ /tests/system/providers/amazon/ @eladkal @o-nikolas # Dev tools -/.github/workflows/ @potiuk @ashb @kaxil +/.github/workflows/ @potiuk @ashb /dev/ @potiuk @ashb @jedcunningham /docker_tests/ @potiuk @ashb /provider_packages/ @potiuk @ashb @@ -95,7 +95,7 @@ Dockerfile.ci @potiuk @ashb # Releasing Guides & Project Guidelines /dev/PROJECT_GUIDELINES.md @kaxil -/dev/PROVIDER_PACKAGE_DETAILS.md @kaxil +/dev/PROVIDER_PACKAGE_DETAILS.md @eladkal /dev/README.md @kaxil /dev/README_RELEASE_*.md @kaxil @pierrejeambrun /dev/README_RELEASE_PROVIDER_PACKAGES.md @eladkal diff --git a/.github/actions/breeze/action.yml b/.github/actions/breeze/action.yml index 164914c3d525b..69ebcc7c66e6e 100644 --- a/.github/actions/breeze/action.yml +++ b/.github/actions/breeze/action.yml @@ -21,9 +21,6 @@ description: 'Sets up Python and Breeze' inputs: python-version: description: 'Python version to use' - # Version of Python used for reproducibility of the packages built - # Python 3.8 tarfile produces different tarballs than Python 3.9+ tarfile that's why we are forcing - # Python 3.9 for all release preparation commands to make sure that the tarballs are reproducible default: "3.9" outputs: host-python-version: diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index be62d541f0dea..e790d65e2fe6f 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -19,532 +19,532 @@ labelPRBasedOnFilePath: provider:airbyte: - - airflow/providers/airbyte/**/* + - providers/src/airflow/providers/airbyte/**/* - docs/apache-airflow-providers-airbyte/**/* - - tests/providers/airbyte/**/* - - tests/system/providers/airbyte/**/* + - providers/tests/airbyte/**/* + - providers/tests/system/airbyte/**/* provider:alibaba: - - airflow/providers/alibaba/**/* + - providers/src/airflow/providers/alibaba/**/* - docs/apache-airflow-providers-alibaba/**/* - - tests/providers/alibaba/**/* - - tests/system/providers/alibaba/**/* + - providers/tests/alibaba/**/* + - providers/tests/system/alibaba/**/* provider:amazon-aws: - - airflow/providers/amazon/aws/**/* - - tests/providers/amazon/aws/**/* + - providers/src/airflow/providers/amazon/aws/**/* + - providers/tests/amazon/aws/**/* - docs/apache-airflow-providers-amazon/**/* - - tests/system/providers/amazon/aws/**/* + - providers/tests/system/amazon/aws/**/* provider:apache-beam: - - airflow/providers/apache/beam/**/* + - providers/src/airflow/providers/apache/beam/**/* - docs/apache-airflow-providers-apache-beam/**/* - - tests/providers/apache/beam/**/* - - tests/system/providers/apache/beam/**/* + - providers/tests/apache/beam/**/* + - providers/tests/system/apache/beam/**/* provider:apache-cassandra: - - airflow/providers/apache/cassandra/**/* + - providers/src/airflow/providers/apache/cassandra/**/* - docs/apache-airflow-providers-apache-cassandra/**/* - - tests/providers/apache/cassandra/**/* - - tests/system/providers/apache/cassandra/**/* + - providers/tests/apache/cassandra/**/* + - providers/tests/system/apache/cassandra/**/* provider:apache-drill: - - airflow/providers/apache/drill/**/* + - providers/src/airflow/providers/apache/drill/**/* - docs/apache-airflow-providers-apache-drill/**/* - - tests/providers/apache/drill/**/* - - tests/system/providers/apache/drill/**/* + - providers/tests/apache/drill/**/* + - providers/tests/system/apache/drill/**/* provider:apache-druid: - - airflow/providers/apache/druid/**/* + - providers/src/airflow/providers/apache/druid/**/* - docs/apache-airflow-providers-apache-druid/**/* - - tests/providers/apache/druid/**/* - - tests/system/providers/apache/druid/**/* + - providers/tests/apache/druid/**/* + - providers/tests/system/apache/druid/**/* provider:apache-flink: - - airflow/providers/apache/flink/**/* + - providers/src/airflow/providers/apache/flink/**/* - docs/apache-airflow-providers-apache-flink/**/* - - tests/providers/apache/flink/**/* + - providers/tests/apache/flink/**/* provider:apache-hdfs: - - airflow/providers/apache/hdfs/**/* + - providers/src/airflow/providers/apache/hdfs/**/* - docs/apache-airflow-providers-apache-hdfs/**/* - - tests/providers/apache/hdfs/**/* + - providers/tests/apache/hdfs/**/* provider:apache-hive: - - airflow/providers/apache/hive/**/* + - providers/src/airflow/providers/apache/hive/**/* - docs/apache-airflow-providers-apache-hive/**/* - - tests/providers/apache/hive/**/* - - tests/system/providers/apache/hive/**/* + - providers/tests/apache/hive/**/* + - providers/tests/system/apache/hive/**/* provider:apache-iceberg: - - airflow/providers/apache/iceberg/**/* + - providers/src/airflow/providers/apache/iceberg/**/* - docs/apache-airflow-providers-apache-iceberg/**/* - - tests/providers/apache/iceberg/**/* - - tests/system/providers/apache/iceberg/**/* + - providers/tests/apache/iceberg/**/* + - providers/tests/system/apache/iceberg/**/* provider:apache-impala: - - airflow/providers/apache/impala/**/* + - providers/src/airflow/providers/apache/impala/**/* - docs/apache-airflow-providers-apache-impala/**/* - - tests/providers/apache/impala/**/* + - providers/tests/apache/impala/**/* provider:apache-kafka: - - airflow/providers/apache/kafka/**/* + - providers/src/airflow/providers/apache/kafka/**/* - docs/apache-airflow-providers-apache-kafka/**/* - - tests/providers/apache/kafka/**/* - - tests/system/providers/apache/kafka/**/* + - providers/tests/apache/kafka/**/* + - providers/tests/system/apache/kafka/**/* provider:apache-kylin: - - airflow/providers/apache/kylin/**/* + - providers/src/airflow/providers/apache/kylin/**/* - docs/apache-airflow-providers-apache-kylin/**/* - - tests/providers/apache/kylin/**/* - - tests/system/providers/apache/kylin/**/* + - providers/tests/apache/kylin/**/* + - providers/tests/system/apache/kylin/**/* provider:apache-livy: - - airflow/providers/apache/livy/**/* + - providers/src/airflow/providers/apache/livy/**/* - docs/apache-airflow-providers-apache-livy/**/* - - tests/providers/apache/livy/**/* - - tests/system/providers/apache/livy/**/* + - providers/tests/apache/livy/**/* + - providers/tests/system/apache/livy/**/* provider:apache-pig: - - airflow/providers/apache/pig/**/* + - providers/src/airflow/providers/apache/pig/**/* - docs/apache-airflow-providers-apache-pig/**/* - - tests/providers/apache/pig/**/* - - tests/system/providers/apache/pig/**/* + - providers/tests/apache/pig/**/* + - providers/tests/system/apache/pig/**/* provider:apache-pinot: - - airflow/providers/apache/pinot/**/* + - providers/src/airflow/providers/apache/pinot/**/* - docs/apache-airflow-providers-apache-pinot/**/* - - tests/providers/apache/pinot/**/* - - tests/system/providers/apache/pinot/**/* + - providers/tests/apache/pinot/**/* + - providers/tests/system/apache/pinot/**/* provider:apache-spark: - - airflow/providers/apache/spark/**/* + - providers/src/airflow/providers/apache/spark/**/* - docs/apache-airflow-providers-apache-spark/**/* - - tests/providers/apache/spark/**/* - - tests/system/providers/apache/spark/**/* + - providers/tests/apache/spark/**/* + - providers/tests/system/apache/spark/**/* provider:apprise: - - airflow/providers/apprise/**/* + - providers/src/airflow/providers/apprise/**/* - docs/apache-airflow-providers-apprise/**/* - - tests/providers/apprise/**/* + - providers/tests/apprise/**/* provider:arangodb: - - airflow/providers/arangodb/**/* + - providers/src/airflow/providers/arangodb/**/* - docs/apache-airflow-providers-arangodb/**/* - - tests/providers/arangodb/**/* + - providers/tests/arangodb/**/* provider:asana: - - airflow/providers/asana/**/* + - providers/src/airflow/providers/asana/**/* - docs/apache-airflow-providers-asana/**/* - - tests/providers/asana/**/* - - tests/system/providers/asana/**/* + - providers/tests/asana/**/* + - providers/tests/system/asana/**/* provider:atlassian-jira: - - airflow/providers/atlassian/jira/**/* + - providers/src/airflow/providers/atlassian/jira/**/* - docs/apache-airflow-providers-atlassian-jira/**/* - - tests/providers/atlassian/jira/**/* + - providers/tests/atlassian/jira/**/* provider:celery: - - airflow/providers/celery/**/* + - providers/src/airflow/providers/celery/**/* - docs/apache-airflow-providers-celery/**/* - - tests/providers/celery/**/* + - providers/tests/celery/**/* provider:cloudant: - - airflow/providers/cloudant/**/* + - providers/src/airflow/providers/cloudant/**/* - docs/apache-airflow-providers-cloudant/**/* - - tests/providers/cloudant/**/* + - providers/tests/cloudant/**/* provider:cncf-kubernetes: - airflow/**/kubernetes_*.py - airflow/example_dags/example_kubernetes_executor.py - - airflow/providers/cncf/kubernetes/**/* - - airflow/providers/celery/executors/celery_kubernetes_executor.py + - providers/src/airflow/providers/cncf/kubernetes/**/* + - providers/src/airflow/providers/celery/executors/celery_kubernetes_executor.py - docs/apache-airflow-providers-cncf-kubernetes/**/* - kubernetes_tests/**/* - - tests/providers/cncf/kubernetes/**/* - - tests/system/providers/cncf/kubernetes/**/* + - providers/tests/cncf/kubernetes/**/* + - providers/tests/system/cncf/kubernetes/**/* provider:cohere: - - airflow/providers/cohere/**/* + - providers/src/airflow/providers/cohere/**/* - docs/apache-airflow-providers-cohere/**/* - - tests/providers/cohere/**/* - - tests/system/providers/cohere/**/* + - providers/tests/cohere/**/* + - providers/tests/system/cohere/**/* provider:common-compat: - - airflow/providers/common/compat/**/* + - providers/src/airflow/providers/common/compat/**/* - docs/apache-airflow-providers-common-compat/**/* - - tests/providers/common/compat/**/* + - providers/tests/common/compat/**/* provider:common-io: - - airflow/providers/common/io/**/* + - providers/src/airflow/providers/common/io/**/* - docs/apache-airflow-providers-common-io/**/* - - tests/system/providers/common/io/**/* + - providers/tests/system/common/io/**/* provider:common-sql: - - airflow/providers/common/sql/**/* + - providers/src/airflow/providers/common/sql/**/* - docs/apache-airflow-providers-common-sql/**/* - - tests/providers/common/sql/**/* - - tests/system/providers/common/sql/**/* + - providers/tests/common/sql/**/* + - providers/tests/system/common/sql/**/* provider:standard: - - airflow/providers/standard/**/* + - providers/src/airflow/providers/standard/**/* - docs/apache-airflow-providers-standard/**/* - - tests/providers/standard/**/* + - providers/tests/standard/**/* provider:databricks: - - airflow/providers/databricks/**/* + - providers/src/airflow/providers/databricks/**/* - docs/apache-airflow-providers-databricks/**/* - - tests/providers/databricks/**/* - - tests/system/providers/databricks/**/* + - providers/tests/databricks/**/* + - providers/tests/system/databricks/**/* provider:datadog: - - airflow/providers/datadog/**/* + - providers/src/airflow/providers/datadog/**/* - docs/apache-airflow-providers-datadog/**/* - - tests/providers/datadog/**/* + - providers/tests/datadog/**/* provider:dbt-cloud: - - airflow/providers/dbt/cloud/**/* + - providers/src/airflow/providers/dbt/cloud/**/* - docs/apache-airflow-providers-dbt-cloud/**/* - - tests/providers/dbt/cloud/**/* - - tests/system/providers/dbt/cloud/**/* + - providers/tests/dbt/cloud/**/* + - providers/tests/system/dbt/cloud/**/* provider:dingding: - - airflow/providers/dingding/**/* + - providers/src/airflow/providers/dingding/**/* - docs/apache-airflow-providers-dingding/**/* - - tests/providers/dingding/**/* - - tests/system/providers/dingding/**/* + - providers/tests/dingding/**/* + - providers/tests/system/dingding/**/* provider:discord: - - airflow/providers/discord/**/* + - providers/src/airflow/providers/discord/**/* - docs/apache-airflow-providers-discord/**/* - - tests/providers/discord/**/* + - providers/tests/discord/**/* provider:docker: - - airflow/providers/docker/**/* + - providers/src/airflow/providers/docker/**/* - docs/apache-airflow-providers-docker/**/* - - tests/providers/docker/**/* - - tests/system/providers/docker/**/* + - providers/tests/docker/**/* + - providers/tests/system/docker/**/* provider:elasticsearch: - - airflow/providers/elasticsearch/**/* + - providers/src/airflow/providers/elasticsearch/**/* - docs/apache-airflow-providers-elasticsearch/**/* - - tests/providers/elasticsearch/**/* - - tests/system/providers/elasticsearch/**/* + - providers/tests/elasticsearch/**/* + - providers/tests/system/elasticsearch/**/* provider:exasol: - - airflow/providers/exasol/**/* + - providers/src/airflow/providers/exasol/**/* - docs/apache-airflow-providers-exasol/**/* - - tests/providers/exasol/**/* + - providers/tests/exasol/**/* provider:fab: - - airflow/providers/fab/**/* + - providers/src/airflow/providers/fab/**/* - docs/apache-airflow-providers-fab/**/* - - tests/providers/fab/**/* + - providers/tests/fab/**/* provider:facebook: - - airflow/providers/facebook/**/* + - providers/src/airflow/providers/facebook/**/* - docs/apache-airflow-providers-facebook/**/* - - tests/providers/facebook/**/* + - providers/tests/facebook/**/* provider:ftp: - - airflow/providers/ftp/**/* + - providers/src/airflow/providers/ftp/**/* - docs/apache-airflow-providers-ftp/**/* - - tests/providers/ftp/**/* - - tests/system/providers/ftp/**/* + - providers/tests/ftp/**/* + - providers/tests/system/ftp/**/* provider:github: - - airflow/providers/github/**/* + - providers/src/airflow/providers/github/**/* - docs/apache-airflow-providers-github/**/* - - tests/providers/github/**/* - - tests/system/providers/github/**/* + - providers/tests/github/**/* + - providers/tests/system/github/**/* provider:google: - - airflow/providers/google/**/* + - providers/src/airflow/providers/google/**/* - docs/apache-airflow-providers-google/**/* - - tests/providers/google/**/* - - tests/system/providers/google/**/* + - providers/tests/google/**/* + - providers/tests/system/google/**/* provider:grpc: - - airflow/providers/grpc/**/* + - providers/src/airflow/providers/grpc/**/* - docs/apache-airflow-providers-grpc/**/* - - tests/providers/grpc/**/* + - providers/tests/grpc/**/* provider:hashicorp: - - airflow/providers/hashicorp/**/* + - providers/src/airflow/providers/hashicorp/**/* - docs/apache-airflow-providers-hashicorp/**/* - - tests/providers/hashicorp/**/* + - providers/tests/hashicorp/**/* provider:http: - - airflow/providers/http/**/* + - providers/src/airflow/providers/http/**/* - docs/apache-airflow-providers-http/**/* - - tests/providers/http/**/* - - tests/system/providers/http/**/* + - providers/tests/http/**/* + - providers/tests/system/http/**/* provider:imap: - - airflow/providers/imap/**/* + - providers/src/airflow/providers/imap/**/* - docs/apache-airflow-providers-imap/**/* - - tests/providers/imap/**/* + - providers/tests/imap/**/* provider:influxdb: - - airflow/providers/influxdb/**/* + - providers/src/airflow/providers/influxdb/**/* - docs/apache-airflow-providers-influxdb/**/* - - tests/providers/influxdb/**/* - - tests/system/providers/influxdb/**/* + - providers/tests/influxdb/**/* + - providers/tests/system/influxdb/**/* provider:jdbc: - - airflow/providers/jdbc/**/* + - providers/src/airflow/providers/jdbc/**/* - docs/apache-airflow-providers-jdbc/**/* - - tests/providers/jdbc/**/* - - tests/system/providers/jdbc/**/* + - providers/tests/jdbc/**/* + - providers/tests/system/jdbc/**/* provider:jenkins: - - airflow/providers/jenkins/**/* + - providers/src/airflow/providers/jenkins/**/* - docs/apache-airflow-providers-jenkins/**/* - - tests/providers/jenkins/**/* - - tests/system/providers/jenkins/**/* + - providers/tests/jenkins/**/* + - providers/tests/system/jenkins/**/* provider:microsoft-azure: - - airflow/providers/microsoft/azure/**/* - - tests/providers/microsoft/azure/**/* + - providers/src/airflow/providers/microsoft/azure/**/* + - providers/tests/microsoft/azure/**/* - docs/apache-airflow-providers-microsoft-azure/**/* - - tests/system/providers/microsoft/azure/**/* + - providers/tests/system/microsoft/azure/**/* provider:microsoft-mssql: - - airflow/providers/microsoft/mssql/**/* + - providers/src/airflow/providers/microsoft/mssql/**/* - docs/apache-airflow-providers-microsoft-mssql/**/* - - tests/providers/microsoft/mssql/**/* - - tests/system/providers/microsoft/mssql/**/* + - providers/tests/microsoft/mssql/**/* + - providers/tests/system/microsoft/mssql/**/* provider:microsoft-psrp: - - airflow/providers/microsoft/psrp/**/* + - providers/src/airflow/providers/microsoft/psrp/**/* - docs/apache-airflow-providers-microsoft-psrp/**/* - - tests/providers/microsoft/psrp/**/* + - providers/tests/microsoft/psrp/**/* provider:microsoft-winrm: - - airflow/providers/microsoft/winrm/**/* + - providers/src/airflow/providers/microsoft/winrm/**/* - docs/apache-airflow-providers-microsoft-winrm/**/* - - tests/providers/microsoft/winrm/**/* - - tests/system/providers/microsoft/winrm/**/* + - providers/tests/microsoft/winrm/**/* + - providers/tests/system/microsoft/winrm/**/* provider:mongo: - - airflow/providers/mongo/**/* + - providers/src/airflow/providers/mongo/**/* - docs/apache-airflow-providers-mongo/**/* - - tests/providers/mongo/**/* + - providers/tests/mongo/**/* provider:mysql: - - airflow/providers/mysql/**/* + - providers/src/airflow/providers/mysql/**/* - docs/apache-airflow-providers-mysql/**/* - - tests/providers/mysql/**/* - - tests/system/providers/mysql/**/* + - providers/tests/mysql/**/* + - providers/tests/system/mysql/**/* provider:neo4j: - - airflow/providers/neo4j/**/* + - providers/src/airflow/providers/neo4j/**/* - docs/apache-airflow-providers-neo4j/**/* - - tests/providers/neo4j/**/* - - tests/system/providers/neo4j/**/* + - providers/tests/neo4j/**/* + - providers/tests/system/neo4j/**/* provider:odbc: - - airflow/providers/odbc/**/* + - providers/src/airflow/providers/odbc/**/* - docs/apache-airflow-providers-odbc/**/* - - tests/providers/odbc/**/* + - providers/tests/odbc/**/* provider:openai: - - airflow/providers/openai/**/* + - providers/src/airflow/providers/openai/**/* - docs/apache-airflow-providers-openai/**/* - - tests/providers/openai/**/* - - tests/system/providers/openai/**/* + - providers/tests/openai/**/* + - providers/tests/system/openai/**/* provider:openfaas: - - airflow/providers/openfaas/**/* + - providers/src/airflow/providers/openfaas/**/* - docs/apache-airflow-providers-openfaas/**/* - - tests/providers/openfaas/**/* + - providers/tests/openfaas/**/* provider:openlineage: - - airflow/providers/openlineage/**/* + - providers/src/airflow/providers/openlineage/**/* - docs/apache-airflow-providers-openlineage/**/* - - tests/providers/openlineage/**/* + - providers/tests/openlineage/**/* provider:opensearch: - - airflow/providers/opensearch/**/* + - providers/src/airflow/providers/opensearch/**/* - docs/apache-airflow-providers-opensearch/**/* - - tests/providers/opensearch/**/* - - tests/system/providers/opensearch/**/* + - providers/tests/opensearch/**/* + - providers/tests/system/opensearch/**/* provider:opsgenie: - - airflow/providers/opsgenie/**/* + - providers/src/airflow/providers/opsgenie/**/* - docs/apache-airflow-providers-opsgenie/**/* - - tests/providers/opsgenie/**/* - - tests/system/providers/opsgenie/**/* + - providers/tests/opsgenie/**/* + - providers/tests/system/opsgenie/**/* provider:Oracle: - - airflow/providers/oracle/**/* + - providers/src/airflow/providers/oracle/**/* - docs/apache-airflow-providers-oracle/**/* - - tests/providers/oracle/**/* + - providers/tests/oracle/**/* provider:pagerduty: - - airflow/providers/pagerduty/**/* + - providers/src/airflow/providers/pagerduty/**/* - docs/apache-airflow-providers-pagerduty/**/* - - tests/providers/pagerduty/**/* + - providers/tests/pagerduty/**/* provider:papermill: - - airflow/providers/papermill/**/* + - providers/src/airflow/providers/papermill/**/* - docs/apache-airflow-providers-papermill/**/* - - tests/providers/papermill/**/* - - tests/system/providers/papermill/**/* + - providers/tests/papermill/**/* + - providers/tests/system/papermill/**/* provider:pgvector: - - airflow/providers/pgvector/**/* + - providers/src/airflow/providers/pgvector/**/* - docs/apache-airflow-providers-pgvector/**/* - - tests/providers/pgvector/**/* - - tests/system/providers/pgvector/**/* + - providers/tests/pgvector/**/* + - providers/tests/system/pgvector/**/* provider:pinecone: - - airflow/providers/pinecone/**/* + - providers/src/airflow/providers/pinecone/**/* - docs/apache-airflow-providers-pinecone/**/* - - tests/providers/pinecone/**/* - - tests/system/providers/pinecone/**/* + - providers/tests/pinecone/**/* + - providers/tests/system/pinecone/**/* provider:postgres: - - airflow/providers/postgres/**/* + - providers/src/airflow/providers/postgres/**/* - docs/apache-airflow-providers-postgres/**/* - - tests/providers/postgres/**/* - - tests/system/providers/postgres/**/* + - providers/tests/postgres/**/* + - providers/tests/system/postgres/**/* provider:presto: - - airflow/providers/presto/**/* + - providers/src/airflow/providers/presto/**/* - docs/apache-airflow-providers-presto/**/* - - tests/providers/presto/**/* - - tests/system/providers/presto/**/* + - providers/tests/presto/**/* + - providers/tests/system/presto/**/* provider:qdrant: - - airflow/providers/qdrant/**/* + - providers/src/airflow/providers/qdrant/**/* - docs/apache-airflow-providers-qdrant/**/* - - tests/providers/qdrant/**/* - - tests/system/providers/qdrant/**/* + - providers/tests/qdrant/**/* + - providers/tests/system/qdrant/**/* provider:redis: - - airflow/providers/redis/**/* + - providers/src/airflow/providers/redis/**/* - docs/apache-airflow-providers-redis/**/* - - tests/providers/redis/**/* + - providers/tests/redis/**/* provider:salesforce: - - airflow/providers/salesforce/**/* + - providers/src/airflow/providers/salesforce/**/* - docs/apache-airflow-providers-salesforce/**/* - - tests/providers/salesforce/**/* - - tests/system/providers/salesforce/**/* + - providers/tests/salesforce/**/* + - providers/tests/system/salesforce/**/* provider:samba: - - airflow/providers/samba/**/* + - providers/src/airflow/providers/samba/**/* - docs/apache-airflow-providers-samba/**/* - - tests/providers/samba/**/* + - providers/tests/samba/**/* provider:segment: - - airflow/providers/segment/**/* + - providers/src/airflow/providers/segment/**/* - docs/apache-airflow-providers-segment/**/* - - tests/providers/segment/**/* + - providers/tests/segment/**/* provider:sendgrid: - - airflow/providers/segment/**/* + - providers/src/airflow/providers/segment/**/* - docs/apache-airflow-providers-segment/**/* - - tests/providers/segment/**/* + - providers/tests/segment/**/* provider:sftp: - - airflow/providers/sftp/**/* + - providers/src/airflow/providers/sftp/**/* - docs/apache-airflow-providers-sftp/**/* - - tests/providers/sftp/**/* + - providers/tests/sftp/**/* provider:singularity: - - airflow/providers/singularity/**/* + - providers/src/airflow/providers/singularity/**/* - docs/apache-airflow-providers-singularity/**/* - - tests/providers/singularity/**/* - - tests/system/providers/singularity/**/* + - providers/tests/singularity/**/* + - providers/tests/system/singularity/**/* provider:slack: - - airflow/providers/slack/**/* + - providers/src/airflow/providers/slack/**/* - docs/apache-airflow-providers-slack/**/* - - tests/providers/slack/**/* - - tests/system/providers/slack/**/* + - providers/tests/slack/**/* + - providers/tests/system/slack/**/* provider:smtp: - - airflow/providers/smtp/**/* + - providers/src/airflow/providers/smtp/**/* - docs/apache-airflow-providers-smtp/**/* - - tests/providers/smtp/**/* + - providers/tests/smtp/**/* provider:snowflake: - - airflow/providers/snowflake/**/* + - providers/src/airflow/providers/snowflake/**/* - docs/apache-airflow-providers-snowflake/**/* - - tests/providers/snowflake/**/* - - tests/system/providers/snowflake/**/* + - providers/tests/snowflake/**/* + - providers/tests/system/snowflake/**/* provider:sqlite: - - airflow/providers/sqlite/**/* + - providers/src/airflow/providers/sqlite/**/* - docs/apache-airflow-providers-sqlite/**/* - - tests/providers/sqlite/**/* - - tests/system/providers/sqlite/**/* + - providers/tests/sqlite/**/* + - providers/tests/system/sqlite/**/* provider:ssh: - - airflow/providers/ssh/**/* + - providers/src/airflow/providers/ssh/**/* - docs/apache-airflow-providers-ssh/**/* - - tests/providers/ssh/**/* + - providers/tests/ssh/**/* provider:tableau: - - airflow/providers/tableau/**/* + - providers/src/airflow/providers/tableau/**/* - docs/apache-airflow-providers-tableau/**/* - - tests/providers/tableau/**/* - - tests/system/providers/tableau/**/* + - providers/tests/tableau/**/* + - providers/tests/system/tableau/**/* provider:telegram: - - airflow/providers/telegram/**/* + - providers/src/airflow/providers/telegram/**/* - docs/apache-airflow-providers-telegram/**/* - - tests/providers/telegram/**/* - - tests/system/providers/telegram/**/* + - providers/tests/telegram/**/* + - providers/tests/system/telegram/**/* provider:teradata: - - airflow/providers/teradata/**/* + - providers/src/airflow/providers/teradata/**/* - docs/apache-airflow-providers-teradata/**/* - - tests/providers/teradata/**/* - - tests/system/providers/teradata/**/* + - providers/tests/teradata/**/* + - providers/tests/system/teradata/**/* provider:trino: - - airflow/providers/trino/**/* + - providers/src/airflow/providers/trino/**/* - docs/apache-airflow-providers-trino/**/* - - tests/providers/trino/**/* - - tests/system/providers/trino/**/* + - providers/tests/trino/**/* + - providers/tests/system/trino/**/* provider:vertica: - - airflow/providers/vertica/**/* + - providers/src/airflow/providers/vertica/**/* - docs/apache-airflow-providers-vertica/**/* - - tests/providers/vertica/**/* + - providers/tests/vertica/**/* provider:weaviate: - - airflow/providers/weaviate/**/* + - providers/src/airflow/providers/weaviate/**/* - docs/apache-airflow-providers-weaviate/**/* - - tests/providers/weaviate/**/* - - tests/system/providers/weaviate/**/* + - providers/tests/weaviate/**/* + - providers/tests/system/weaviate/**/* provider:yandex: - - airflow/providers/yandex/**/* + - providers/src/airflow/providers/yandex/**/* - docs/apache-airflow-providers-yandex/**/* - - tests/providers/yandex/**/* - - tests/system/providers/yandex/**/* + - providers/tests/yandex/**/* + - providers/tests/system/yandex/**/* provider:ydb: - - airflow/providers/ydb/**/* + - providers/src/airflow/providers/ydb/**/* - docs/apache-airflow-providers-ydb/**/* - - tests/providers/ydb/**/* - - tests/system/providers/ydb/**/* + - providers/tests/ydb/**/* + - providers/tests/system/ydb/**/* provider:zendesk: - - airflow/providers/zendesk/**/* + - providers/src/airflow/providers/zendesk/**/* - docs/apache-airflow-providers-zendesk/**/* - - tests/providers/zendesk/**/* - - tests/system/providers/zendesk/**/* + - providers/tests/zendesk/**/* + - providers/tests/system/zendesk/**/* area:providers: - - airflow/providers/**/* + - providers/src/airflow/providers/**/* - docs/apache-airflow-providers-*/**/* - - tests/providers/**/* - - tests/system/providers/**/* + - providers/tests/**/* + - providers/tests/system/**/* area:API: - airflow/api/**/* @@ -611,10 +611,10 @@ labelPRBasedOnFilePath: - docs/apache-airflow/administration-and-deployment/lineage.rst area:Logging: - - airflow/providers/**/log/* + - providers/src/airflow/providers/**/log/* - airflow/utils/log/**/* - docs/apache-airflow/administration-and-deployment/logging-monitoring/logging-*.rst - - tests/providers/**/log/* + - providers/tests/**/log/* - tests/utils/log/**/* area:Plugins: @@ -638,9 +638,9 @@ labelPRBasedOnFilePath: area:Secrets: - airflow/secrets/**/* - - airflow/providers/**/secrets/* + - providers/src/airflow/providers/**/secrets/* - tests/secrets/**/* - - tests/providers/**/secrets/* + - providers/tests/**/secrets/* - docs/apache-airflow/security/secrets/**/* area:Triggerer: @@ -679,6 +679,9 @@ labelPRBasedOnFilePath: area:system-tests: - tests/system/**/* + area:task-sdk: + - task_sdk/**/* + area:db-migrations: - airflow/migrations/versions/* diff --git a/.github/workflows/additional-ci-image-checks.yml b/.github/workflows/additional-ci-image-checks.yml index ae9efdb6b0340..878800324b784 100644 --- a/.github/workflows/additional-ci-image-checks.yml +++ b/.github/workflows/additional-ci-image-checks.yml @@ -64,6 +64,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string canary-run: description: "Whether this is a canary run (true/false)" required: true @@ -112,6 +116,7 @@ jobs: use-uv: "true" include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} if: inputs.branch == 'main' # Check that after earlier cache push, breeze command will build quickly @@ -168,3 +173,5 @@ jobs: # use-uv: "true" # upgrade-to-newer-dependencies: ${{ inputs.upgrade-to-newer-dependencies }} # docker-cache: ${{ inputs.docker-cache }} +# disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} +# diff --git a/.github/workflows/additional-prod-image-tests.yml b/.github/workflows/additional-prod-image-tests.yml index 4c9606e1343e6..5ffd2001e0e26 100644 --- a/.github/workflows/additional-prod-image-tests.yml +++ b/.github/workflows/additional-prod-image-tests.yml @@ -48,6 +48,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string canary-run: description: "Whether to run the canary run (true/false)" required: true @@ -72,6 +76,7 @@ jobs: chicken-egg-providers: ${{ inputs.chicken-egg-providers }} constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} if: inputs.default-branch == 'main' && inputs.canary-run == 'true' prod-image-extra-checks-release-branch: @@ -89,6 +94,7 @@ jobs: chicken-egg-providers: ${{ inputs.chicken-egg-providers }} constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} if: inputs.default-branch != 'main' && inputs.canary-run == 'true' test-examples-of-prod-image-building: diff --git a/.github/workflows/basic-tests.yml b/.github/workflows/basic-tests.yml index 2ccb239487679..509d7066f6d38 100644 --- a/.github/workflows/basic-tests.yml +++ b/.github/workflows/basic-tests.yml @@ -195,11 +195,17 @@ jobs: env: HATCH_ENV: "test" working-directory: ./clients/python - - name: "Prepare FAB provider packages: wheel" + - name: "Prepare FAB+standard provider packages: wheel" run: > - breeze release-management prepare-provider-packages fab --package-format wheel --skip-tag-check + breeze release-management prepare-provider-packages fab standard \ + --package-format wheel --skip-tag-check - name: "Install Airflow with fab for webserver tests" run: pip install . dist/apache_airflow_providers_fab-*.whl + - name: "Prepare Standard provider packages: wheel" + run: > + breeze release-management prepare-provider-packages standard --package-format wheel --skip-tag-check + - name: "Install Airflow with standard provider for webserver tests" + run: pip install . dist/apache_airflow_providers_standard-*.whl - name: "Install Python client" run: pip install ./dist/apache_airflow_client-*.whl - name: "Initialize Airflow DB and start webserver" diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index abf966faede02..943b01f8f8916 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -16,7 +16,7 @@ # under the License. # --- -name: "Build Images" +name: Build Images run-name: > Build images for ${{ github.event.pull_request.title }} ${{ github.event.pull_request._links.html.href }} on: # yamllint disable-line rule:truthy @@ -54,7 +54,7 @@ concurrency: jobs: build-info: timeout-minutes: 10 - name: "Build Info" + name: Build Info # At build-info stage we do not yet have outputs so we need to hard-code the runs-on to public runners runs-on: ["ubuntu-22.04"] env: @@ -71,6 +71,7 @@ jobs: prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} default-branch: ${{ steps.selective-checks.outputs.default-branch }} + disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} constraints-branch: ${{ steps.selective-checks.outputs.default-constraints-branch }} runs-on-as-json-default: ${{ steps.selective-checks.outputs.runs-on-as-json-default }} runs-on-as-json-public: ${{ steps.selective-checks.outputs.runs-on-as-json-public }} @@ -89,7 +90,7 @@ jobs: }}" if: github.repository == 'apache/airflow' steps: - - name: "Cleanup repo" + - name: Cleanup repo shell: bash run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - name: Discover PR merge commit @@ -154,13 +155,13 @@ jobs: # COMPOSITE ACTIONS. WE CAN RUN ANYTHING THAT IS IN THE TARGET BRANCH AND THERE IS NO RISK THAT # CODE WILL BE RUN FROM THE PR. #################################################################################################### - - name: "Cleanup docker" + - name: Cleanup docker run: ./scripts/ci/cleanup_docker.sh - - name: "Setup python" + - name: Setup python uses: actions/setup-python@v5 with: - python-version: 3.8 - - name: "Install Breeze" + python-version: "3.9" + - name: Install Breeze uses: ./.github/actions/breeze #################################################################################################### # WE RUN SELECTIVE CHECKS HERE USING THE TARGET COMMIT AND ITS PARENT TO BE ABLE TO COMPARE THEM @@ -210,9 +211,11 @@ jobs: constraints-branch: ${{ needs.build-info.outputs.constraints-branch }} upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} + generate-constraints: - name: "Generate constraints" + name: Generate constraints needs: [build-info, build-ci-images] uses: ./.github/workflows/generate-constraints.yml with: @@ -247,7 +250,7 @@ jobs: push-image: "true" use-uv: "true" image-tag: ${{ needs.build-info.outputs.image-tag }} - platform: "linux/amd64" + platform: linux/amd64 python-versions: ${{ needs.build-info.outputs.python-versions }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} branch: ${{ needs.build-info.outputs.default-branch }} @@ -256,3 +259,4 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} diff --git a/.github/workflows/check-providers.yml b/.github/workflows/check-providers.yml index b394f7927329a..f5ff95b73c2f7 100644 --- a/.github/workflows/check-providers.yml +++ b/.github/workflows/check-providers.yml @@ -108,10 +108,9 @@ jobs: run: > breeze release-management generate-issue-content-providers --only-available-in-dist --disable-progress - - name: > - Remove incompatible Python ${{ matrix.python-version }} provider packages + - name: Remove Python 3.9-incompatible provider packages run: | - echo "Removing Python 3.8-incompatible provider: cloudant" + echo "Removing Python 3.9-incompatible provider: cloudant" rm -vf dist/apache_airflow_providers_cloudant* - name: "Generate source constraints from CI image" shell: bash diff --git a/.github/workflows/ci-image-build.yml b/.github/workflows/ci-image-build.yml index 1c4b31b55a604..b8e2feac1755f 100644 --- a/.github/workflows/ci-image-build.yml +++ b/.github/workflows/ci-image-build.yml @@ -95,6 +95,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string jobs: build-ci-images: strategy: @@ -171,6 +175,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" --python "${{ matrix.python-version }}" --platform "${{ inputs.platform }}" env: DOCKER_CACHE: ${{ inputs.docker-cache }} + DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} # You can override CONSTRAINTS_GITHUB_REPOSITORY by setting secret in your repo but by default the diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 716323cb9acfd..8a9d716cd8421 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,6 +57,7 @@ jobs: outputs: image-tag: ${{ github.event.pull_request.head.sha || github.sha }} docker-cache: ${{ steps.selective-checks.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ steps.selective-checks.outputs.disable-airflow-repo-cache }} affected-providers-list-as-string: >- ${{ steps.selective-checks.outputs.affected-providers-list-as-string }} upgrade-to-newer-dependencies: ${{ steps.selective-checks.outputs.upgrade-to-newer-dependencies }} @@ -93,6 +94,7 @@ jobs: run-ui-tests: ${{ steps.selective-checks.outputs.run-ui-tests }} run-www-tests: ${{ steps.selective-checks.outputs.run-www-tests }} run-kubernetes-tests: ${{ steps.selective-checks.outputs.run-kubernetes-tests }} + run-task-sdk-tests: ${{ steps.selective-checks.outputs.run-task-sdk-tests }} basic-checks-only: ${{ steps.selective-checks.outputs.basic-checks-only }} ci-image-build: ${{ steps.selective-checks.outputs.ci-image-build }} prod-image-build: ${{ steps.selective-checks.outputs.prod-image-build }} @@ -207,6 +209,7 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} wait-for-ci-images: timeout-minutes: 120 @@ -263,6 +266,7 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} skip-pre-commits: ${{ needs.build-info.outputs.skip-pre-commits }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} canary-run: ${{ needs.build-info.outputs.canary-run }} latest-versions-only: ${{ needs.build-info.outputs.latest-versions-only }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} @@ -558,6 +562,7 @@ jobs: chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} constraints-branch: ${{ needs.build-info.outputs.default-constraints-branch }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} wait-for-prod-images: timeout-minutes: 80 @@ -614,6 +619,7 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} chicken-egg-providers: ${{ needs.build-info.outputs.chicken-egg-providers }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} default-python-version: ${{ needs.build-info.outputs.default-python-version }} canary-run: ${{ needs.build-info.outputs.canary-run }} if: needs.build-info.outputs.prod-image-build == 'true' @@ -669,6 +675,7 @@ jobs: upgrade-to-newer-dependencies: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} include-success-outputs: ${{ needs.build-info.outputs.include-success-outputs }} docker-cache: ${{ needs.build-info.outputs.docker-cache }} + disable-airflow-repo-cache: ${{ needs.build-info.outputs.disable-airflow-repo-cache }} canary-run: ${{ needs.build-info.outputs.canary-run }} notify-slack-failure: diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index 8b392ba204664..6fae105e0a646 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -64,6 +64,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string include-success-outputs: description: "Whether to include success outputs (true/false)" required: true @@ -148,6 +152,7 @@ jobs: use-uv: "true" include-success-outputs: ${{ inputs.include-success-outputs }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} if: inputs.canary-run == 'true' # push-buildx-cache-to-github-registry-arm: diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index 75d9d0054ec78..db80a6ec247ec 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -114,6 +114,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string jobs: build-prod-packages: @@ -276,6 +280,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} + DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} DEBIAN_VERSION: ${{ inputs.debian-version }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} @@ -291,6 +296,7 @@ ${{ inputs.do-build == 'true' && inputs.image-tag || '' }}" env: PUSH: ${{ inputs.push-image }} DOCKER_CACHE: ${{ inputs.docker-cache }} + DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} DEBIAN_VERSION: ${{ inputs.debian-version }} INSTALL_MYSQL_CLIENT_TYPE: ${{ inputs.install-mysql-client-type }} UPGRADE_TO_NEWER_DEPENDENCIES: ${{ inputs.upgrade-to-newer-dependencies }} diff --git a/.github/workflows/prod-image-extra-checks.yml b/.github/workflows/prod-image-extra-checks.yml index 82d327ba2f16d..bb63faef7b243 100644 --- a/.github/workflows/prod-image-extra-checks.yml +++ b/.github/workflows/prod-image-extra-checks.yml @@ -63,6 +63,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string jobs: myssql-client-image: uses: ./.github/workflows/prod-image-build.yml @@ -84,6 +88,7 @@ jobs: chicken-egg-providers: ${{ inputs.chicken-egg-providers }} constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} pip-image: uses: ./.github/workflows/prod-image-build.yml @@ -107,3 +112,4 @@ jobs: chicken-egg-providers: ${{ inputs.chicken-egg-providers }} constraints-branch: ${{ inputs.constraints-branch }} docker-cache: ${{ inputs.docker-cache }} + disable-airflow-repo-cache: ${{ inputs.disable-airflow-repo-cache }} diff --git a/.github/workflows/push-image-cache.yml b/.github/workflows/push-image-cache.yml index 0dc83a3fd66ea..10a33275ad3f3 100644 --- a/.github/workflows/push-image-cache.yml +++ b/.github/workflows/push-image-cache.yml @@ -76,6 +76,10 @@ on: # yamllint disable-line rule:truthy description: "Docker cache specification to build the image (registry, local, disabled)." required: true type: string + disable-airflow-repo-cache: + description: "Disable airflow repo cache read from main." + required: true + type: string jobs: push-ci-image-cache: name: "Push CI ${{ inputs.cache-type }}:${{ matrix.python }} image cache " @@ -100,6 +104,7 @@ jobs: DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} DOCKER_CACHE: ${{ inputs.docker-cache }} + DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} @@ -162,6 +167,7 @@ jobs: DEFAULT_BRANCH: ${{ inputs.branch }} DEFAULT_CONSTRAINTS_BRANCH: ${{ inputs.constraints-branch }} DOCKER_CACHE: ${{ inputs.docker-cache }} + DISABLE_AIRFLOW_REPO_CACHE: ${{ inputs.disable-airflow-repo-cache }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} diff --git a/.github/workflows/task-sdk-tests.yml b/.github/workflows/task-sdk-tests.yml new file mode 100644 index 0000000000000..14fae903837c2 --- /dev/null +++ b/.github/workflows/task-sdk-tests.yml @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: Task SDK tests +on: # yamllint disable-line rule:truthy + workflow_call: + inputs: + runs-on-as-json-default: + description: "The array of labels (in json form) determining default runner used for the build." + required: true + type: string + image-tag: + description: "Tag to set for the image" + required: true + type: string + canary-run: + description: "Whether this is a canary run" + required: true + type: string + default-python-version: + description: "Which version of python should be used by default" + required: true + type: string + python-versions: + description: "JSON-formatted array of Python versions to build images from" + required: true + type: string +jobs: + task-sdk-tests: + timeout-minutes: 80 + name: Task SDK:P${{ matrix.python-version }} tests + runs-on: ${{ fromJSON(inputs.runs-on-as-json-default) }} + strategy: + fail-fast: false + matrix: + python-version: "${{fromJSON(inputs.python-versions)}}" + env: + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USERNAME: ${{ github.actor }} + IMAGE_TAG: "${{ inputs.image-tag }}" + INCLUDE_NOT_READY_PROVIDERS: "true" + PYTHON_MAJOR_MINOR_VERSION: "${{ inputs.default-python-version }}" + VERBOSE: "true" + CLEAN_AIRFLOW_INSTALLATION: "${{ inputs.canary-run }}" + if: inputs.run-task-sdk-tests == 'true' + steps: + - name: "Cleanup repo" + shell: bash + run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + - name: "Cleanup docker" + run: ./scripts/ci/cleanup_docker.sh + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}:${{ inputs.image-tag }}" + uses: ./.github/actions/prepare_breeze_and_image + - name: "Cleanup dist files" + run: rm -fv ./dist/* + - name: "Prepare Task SDK packages: wheel" + run: > + breeze release-management prepare-task-sdk-package --package-format wheel + - name: "Verify wheel packages with twine" + run: | + pipx uninstall twine || true + pipx install twine && twine check dist/*.whl + - name: > + Run provider unit tests on + Airflow Task SDK:Python ${{ matrix.python-version }} + if: matrix.run-tests == 'true' + run: > + breeze testing tests --run-in-parallel + --parallel-test-types TaskSDK + --use-packages-from-dist + --package-format wheel diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ad8c6a959cec..fe4315fafc212 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. --- -default_stages: [commit, push] +default_stages: [pre-commit, pre-push] default_language_version: python: python3 node: 22.2.0 @@ -172,7 +172,7 @@ repos: name: Check and update common.sql API stubs entry: ./scripts/ci/pre_commit/update_common_sql_api_stubs.py language: python - files: ^scripts/ci/pre_commit/update_common_sql_api\.py|^airflow/providers/common/sql/.*\.pyi?$ + files: ^scripts/ci/pre_commit/update_common_sql_api\.py|^providers/src/airflow/providers/common/sql/.*\.pyi?$ additional_dependencies: ['rich>=12.4.4', 'mypy==1.9.0', 'black==23.10.0', 'jinja2'] pass_filenames: false require_serial: true @@ -225,9 +225,9 @@ repos: entry: ./scripts/ci/pre_commit/check_deferrable_default.py pass_filenames: false additional_dependencies: ["libcst>=1.1.0"] - files: ^airflow/.*/sensors/.*\.py$|^airflow/.*/operators/.*\.py$ + files: ^(providers/src/)?airflow/.*/(sensors|operators)/.*\.py$ - repo: https://github.com/asottile/blacken-docs - rev: 1.18.0 + rev: 1.19.0 hooks: - id: blacken-docs name: Run black on docs @@ -240,7 +240,7 @@ repos: alias: blacken-docs additional_dependencies: [black==23.10.0] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-merge-conflict name: Check that merge conflicts are not being committed @@ -326,7 +326,7 @@ repos: exclude: ^.*/.*_vendor/|^airflow/www/static/css/material-icons\.css$|^images/.*$|^RELEASE_NOTES\.txt$|^.*package-lock\.json$|^.*/kinglear\.txt$|^.*pnpm-lock\.yaml$ args: - --ignore-words=docs/spelling_wordlist.txt - - --skip=airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md + - --skip=providers/src/airflow/providers/*/*.rst,airflow/www/*.log,docs/*/commits.rst,docs/apache-airflow/tutorial/pipeline_example.csv,*.min.js,*.lock,INTHEWILD.md - --exclude-file=.codespellignorelines - repo: local # Note that this is the 2nd "local" repo group in the .pre-commit-config.yaml file. This is because @@ -343,7 +343,7 @@ repos: language: python entry: ./scripts/ci/pre_commit/validate_operators_init.py pass_filenames: true - files: ^airflow/providers/.*/(operators|transfers|sensors)/.*\.py$ + files: ^providers/src/airflow/providers/.*/(operators|transfers|sensors)/.*\.py$ additional_dependencies: [ 'rich>=12.4.4' ] - id: ruff name: Run 'ruff' for extremely fast Python linting @@ -364,7 +364,7 @@ repos: args: [] require_serial: true additional_dependencies: ["ruff==0.5.5"] - exclude: ^.*/.*_vendor/|^tests/dags/test_imports.py|^airflow/contrib/ + exclude: ^.*/.*_vendor/|^tests/dags/test_imports.py$ - id: replace-bad-characters name: Replace bad characters entry: ./scripts/ci/pre_commit/replace_bad_characters.py @@ -418,7 +418,7 @@ repos: language: python files: ^airflow/.*\.py$ require_serial: true - exclude: ^airflow/kubernetes/|^airflow/providers/ + exclude: ^airflow/kubernetes/|^providers/src/airflow/providers/ entry: ./scripts/ci/pre_commit/check_cncf_k8s_used_for_k8s_executor_only.py additional_dependencies: ['rich>=12.4.4'] - id: check-airflow-provider-compatibility @@ -426,7 +426,7 @@ repos: entry: ./scripts/ci/pre_commit/check_provider_airflow_compatibility.py language: python pass_filenames: true - files: ^airflow/providers/.*\.py$ + files: ^providers/src/airflow/providers/.*\.py$ additional_dependencies: ['rich>=12.4.4'] - id: check-google-re2-as-dependency name: Check google-re2 declared as dep @@ -435,7 +435,7 @@ repos: language: python pass_filenames: true require_serial: true - files: ^airflow/providers/.*\.py$ + files: ^providers/src/airflow/providers/.*\.py$ additional_dependencies: ['rich>=12.4.4'] - id: update-local-yml-file name: Update mounts in the local yml file @@ -449,13 +449,18 @@ repos: description: Check dependency of SQL Providers with common data structure entry: ./scripts/ci/pre_commit/check_common_sql_dependency.py language: python - files: ^airflow/providers/.*/hooks/.*\.py$ + files: ^providers/src/airflow/providers/.*/hooks/.*\.py$ additional_dependencies: ['rich>=12.4.4', 'pyyaml', 'packaging'] - id: update-providers-dependencies name: Update dependencies for provider packages entry: ./scripts/ci/pre_commit/update_providers_dependencies.py language: python - files: ^airflow/providers/.*\.py$|^airflow/providers/.*/provider\.yaml$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$|^scripts/ci/pre_commit/update_providers_dependencies\.py$ + files: | + (?x) + ^providers/src/airflow/providers/.*\.py$ | + ^providers/src/airflow/providers/.*/provider\.yaml$ | + ^providers/tests/.*\.py$ | + ^scripts/ci/pre_commit/update_providers_dependencies\.py$ pass_filenames: false additional_dependencies: ['setuptools', 'rich>=12.4.4', 'pyyaml', 'tomli'] - id: check-extra-packages-references @@ -477,7 +482,7 @@ repos: name: Update extras in documentation entry: ./scripts/ci/pre_commit/insert_extras.py language: python - files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^airflow/providers/.*/provider\.yaml$|^Dockerfile.* + files: ^contributing-docs/12_airflow_dependencies_and_extras.rst$|^INSTALL$|^providers/src/airflow/providers/.*/provider\.yaml$|^Dockerfile.* pass_filenames: false additional_dependencies: ['rich>=12.4.4', 'hatchling==1.25.0'] - id: check-extras-order @@ -510,9 +515,9 @@ repos: (?x) ^scripts/ci/pre_commit/version_heads_map\.py$| ^airflow/migrations/versions/.*$|^airflow/migrations/versions| - ^airflow/providers/fab/migrations/versions/.*$|^airflow/providers/fab/migrations/versions| + ^providers/src/airflow/providers/fab/migrations/versions/.*$|^providers/src/airflow/providers/fab/migrations/versions| ^airflow/utils/db.py$| - ^airflow/providers/fab/auth_manager/models/db.py$ + ^providers/src/airflow/providers/fab/auth_manager/models/db.py$ additional_dependencies: ['packaging','google-re2'] - id: update-version name: Update versions in docs @@ -533,11 +538,11 @@ repos: language: pygrep entry: > (?i) - .*https://github.*[0-9]/tests/system/providers| - .*https://github.*/main/tests/system/providers| - .*https://github.*/master/tests/system/providers| - .*https://github.*/main/airflow/providers/.*/example_dags/| - .*https://github.*/master/airflow/providers/.*/example_dags/ + .*https://github.*[0-9]/providers/tests/system/| + .*https://github.*/main/providers/tests/system/| + .*https://github.*/master/providers/tests/system/| + .*https://github.*/main/providers/src/airflow/providers/.*/example_dags/| + .*https://github.*/master/providers/src/airflow/providers/.*/example_dags/ pass_filenames: true files: ^docs/apache-airflow-providers-.*\.rst - id: check-safe-filter-usage-in-html @@ -550,17 +555,9 @@ repos: - id: check-no-providers-in-core-examples language: pygrep name: No providers imports in core example DAGs - description: The core example DAGs have no dependencies other than core Airflow - entry: "^\\s*from airflow\\.providers.*" + description: The core example DAGs have no dependencies other than standard provider or core Airflow + entry: "^\\s*from airflow\\.providers.(?!standard.)" pass_filenames: true - exclude: > - (?x) - ^airflow/example_dags/example_branch_datetime_operator.py| - ^airflow/example_dags/example_branch_day_of_week_operator.py| - ^airflow/example_dags/example_sensors.py| - ^airflow/example_dags/example_sensors.py| - ^airflow/example_dags/example_sensors.py| - ^airflow/example_dags/example_time_delta_sensor_async.py files: ^airflow/example_dags/.*\.py$ - id: check-no-airflow-deprecation-in-providers language: pygrep @@ -568,7 +565,7 @@ repos: description: Use AirflowProviderDeprecationWarning in providers entry: "^\\s*DeprecationWarning*" pass_filenames: true - files: ^airflow/providers/.*\.py$ + files: ^providers/src/airflow/providers/.*\.py$ - id: check-urlparse-usage-in-code language: pygrep name: Don't use urlparse in code @@ -609,28 +606,28 @@ repos: ^airflow/config_templates/| ^airflow/models/baseoperator.py$| ^airflow/operators/__init__.py$| - ^airflow/providers/amazon/aws/hooks/emr.py$| - ^airflow/providers/amazon/aws/operators/emr.py$| - ^airflow/providers/apache/cassandra/hooks/cassandra.py$| - ^airflow/providers/apache/hive/operators/hive_stats.py$| - ^airflow/providers/apache/hive/transfers/vertica_to_hive.py$| - ^airflow/providers/apache/spark/decorators/| - ^airflow/providers/apache/spark/hooks/| - ^airflow/providers/apache/spark/operators/| - ^airflow/providers/exasol/hooks/exasol.py$| - ^airflow/providers/fab/auth_manager/security_manager/| - ^airflow/providers/google/cloud/hooks/bigquery.py$| - ^airflow/providers/google/cloud/operators/cloud_build.py$| - ^airflow/providers/google/cloud/operators/dataproc.py$| - ^airflow/providers/google/cloud/operators/mlengine.py$| - ^airflow/providers/microsoft/azure/hooks/cosmos.py$| - ^airflow/providers/microsoft/winrm/hooks/winrm.py$| + ^providers/src/airflow/providers/amazon/aws/hooks/emr.py$| + ^providers/src/airflow/providers/amazon/aws/operators/emr.py$| + ^providers/src/airflow/providers/apache/cassandra/hooks/cassandra.py$| + ^providers/src/airflow/providers/apache/hive/operators/hive_stats.py$| + ^providers/src/airflow/providers/apache/hive/transfers/vertica_to_hive.py$| + ^providers/src/airflow/providers/apache/spark/decorators/| + ^providers/src/airflow/providers/apache/spark/hooks/| + ^providers/src/airflow/providers/apache/spark/operators/| + ^providers/src/airflow/providers/exasol/hooks/exasol.py$| + ^providers/src/airflow/providers/fab/auth_manager/security_manager/| + ^providers/src/airflow/providers/google/cloud/hooks/bigquery.py$| + ^providers/src/airflow/providers/google/cloud/operators/cloud_build.py$| + ^providers/src/airflow/providers/google/cloud/operators/dataproc.py$| + ^providers/src/airflow/providers/google/cloud/operators/mlengine.py$| + ^providers/src/airflow/providers/microsoft/azure/hooks/cosmos.py$| + ^providers/src/airflow/providers/microsoft/winrm/hooks/winrm.py$| ^airflow/www/fab_security/manager.py$| ^docs/.*commits.rst$| ^docs/apache-airflow-providers-apache-cassandra/connections/cassandra.rst$| - ^airflow/providers/microsoft/winrm/operators/winrm.py$| - ^airflow/providers/opsgenie/hooks/opsgenie.py$| - ^airflow/providers/redis/provider.yaml$| + ^providers/src/airflow/providers/microsoft/winrm/operators/winrm.py$| + ^providers/src/airflow/providers/opsgenie/hooks/opsgenie.py$| + ^providers/src/airflow/providers/redis/provider.yaml$| ^airflow/serialization/serialized_objects.py$| ^airflow/ui/pnpm-lock.yaml$| ^airflow/utils/db.py$| @@ -655,6 +652,7 @@ repos: ^newsfragments/41761.significant.rst$| ^scripts/ci/pre_commit/vendor_k8s_json_schema.py$| ^tests/| + ^providers/tests/| ^.pre-commit-config\.yaml$| ^.*CHANGELOG\.(rst|txt)$| ^.*RELEASE_NOTES\.rst$| @@ -690,7 +688,7 @@ repos: ^airflow/decorators/.*$| ^airflow/hooks/.*$| ^airflow/operators/.*$| - ^airflow/providers/.*$| + ^providers/src/airflow/providers/.*$| ^airflow/sensors/.*$| ^dev/provider_packages/.*$ - id: check-base-operator-usage @@ -705,7 +703,7 @@ repos: ^airflow/decorators/.*$| ^airflow/hooks/.*$| ^airflow/operators/.*$| - ^airflow/providers/.*$| + ^providers/src/airflow/providers/.*$| ^airflow/sensors/.*$| ^dev/provider_packages/.*$ - id: check-base-operator-usage @@ -716,16 +714,16 @@ repos: pass_filenames: true files: > (?x) - ^airflow/providers/.*\.py$ - exclude: ^.*/.*_vendor/ + ^providers/src/airflow/providers/.*\.py$ + exclude: ^.*/.*_vendor/|providers/src/airflow/providers/standard/operators/bash.py - id: check-get-lineage-collector-providers language: python name: Check providers import hook lineage code from compat description: Make sure you import from airflow.provider.common.compat.lineage.hook instead of airflow.lineage.hook. entry: ./scripts/ci/pre_commit/check_get_lineage_collector_providers.py - files: ^airflow/providers/.*\.py$ - exclude: ^airflow/providers/common/compat/.*\.py$ + files: ^providers/src/airflow/providers/.*\.py$ + exclude: ^providers/src/airflow/providers/common/compat/.*\.py$ additional_dependencies: [ 'rich>=12.4.4' ] - id: check-decorated-operator-implements-custom-name name: Check @task decorator implements custom_operator_name @@ -738,7 +736,7 @@ repos: name: Verify usage of Airflow deprecation classes in core entry: category=DeprecationWarning|category=PendingDeprecationWarning files: \.py$ - exclude: ^airflow/configuration\.py$|^airflow/providers|^scripts/in_container/verify_providers\.py$|^tests/.*$ + exclude: ^airflow/configuration\.py$|^providers/src/airflow/providers/|^scripts/in_container/verify_providers\.py$|^(providers/)?tests/.*$|^tests_common/ pass_filenames: true - id: check-provide-create-sessions-imports language: pygrep @@ -780,7 +778,7 @@ repos: name: Check if aiobotocore is an optional dependency only entry: ./scripts/ci/pre_commit/check_aiobotocore_optional.py language: python - files: ^airflow/providers/.*/provider\.yaml$ + files: ^providers/src/airflow/providers/.*/provider\.yaml$ pass_filenames: true additional_dependencies: ['click', 'rich>=12.4.4', 'pyyaml'] require_serial: true @@ -885,12 +883,6 @@ repos: entry: ./scripts/ci/pre_commit/compile_www_assets_dev.py pass_filenames: false additional_dependencies: ['yarn@1.22.21'] - - id: check-providers-init-file-missing - name: Provider init file is missing - pass_filenames: false - always_run: true - entry: ./scripts/ci/pre_commit/check_providers_init.py - language: python - id: check-providers-subpackages-init-file-exist name: Provider subpackage init files are there pass_filenames: false @@ -952,8 +944,8 @@ repos: name: Check if system tests have required segments of code entry: ./scripts/ci/pre_commit/check_system_tests.py language: python - files: ^tests/system/.*/example_[^/]*\.py$ - exclude: ^tests/system/providers/google/cloud/bigquery/example_bigquery_queries\.py$ + files: ^(providers/)?tests/system/.*/example_[^/]*\.py$ + exclude: ^providers/tests/system/google/cloud/bigquery/example_bigquery_queries\.py$ pass_filenames: true additional_dependencies: ['rich>=12.4.4'] - id: generate-pypi-readme @@ -974,8 +966,8 @@ repos: name: Lint JSON Schema files entry: ./scripts/ci/pre_commit/json_schema.py args: - - --spec-url - - https://json-schema.org/draft-07/schema + - --spec-file + - scripts/ci/pre_commit/draft7_schema.json language: python pass_filenames: true files: .*\.schema\.json$ @@ -1066,13 +1058,6 @@ repos: files: \.py$ exclude: ^.*/.*_vendor/ additional_dependencies: ['rich>=12.4.4'] - - id: check-compat-cache-on-methods - name: Check that compat cache do not use on class methods - entry: ./scripts/ci/pre_commit/compat_cache_on_methods.py - language: python - pass_filenames: true - files: ^airflow/.*\.py$ - exclude: ^.*/.*_vendor/ - id: check-code-deprecations name: Check deprecations categories in decorators entry: ./scripts/ci/pre_commit/check_deprecations.py @@ -1192,12 +1177,23 @@ repos: entry: "^\\s*from re\\s|^\\s*import re\\s" pass_filenames: true files: \.py$ - exclude: ^airflow/providers|^dev/.*\.py$|^scripts/.*\.py$|^tests/|^\w+_tests/|^docs/.*\.py$|^airflow/utils/helpers.py$|^hatch_build.py$ + exclude: | + (?x) + ^airflow/utils/helpers.py$ | + ^providers/src/airflow/providers/ | + ^(providers/)?tests/ | + ^dev/.*\.py$ | + ^scripts/.*\.py$ | + ^docker_tests/.*$ | + ^helm_tests/.*$ | + ^tests_common/.*$ | + ^docs/.*\.py$ | + ^hatch_build.py$ - id: check-provider-docs-valid name: Validate provider doc files entry: ./scripts/ci/pre_commit/check_provider_docs.py language: python - files: ^airflow/providers/.*/provider\.yaml|^docs/.* + files: ^providers/src/airflow/providers/.*/provider\.yaml|^docs/.* additional_dependencies: ['rich>=12.4.4', 'pyyaml', 'jinja2'] require_serial: true - id: bandit @@ -1288,9 +1284,20 @@ repos: - id: mypy-airflow name: Run mypy for airflow language: python - entry: ./scripts/ci/pre_commit/mypy.py --namespace-packages + entry: ./scripts/ci/pre_commit/mypy.py files: \.py$ - exclude: ^.*/.*_vendor/|^airflow/migrations|^airflow/providers|^dev|^scripts|^docs|^provider_packages|^tests/providers|^tests/system/providers|^tests/dags/test_imports.py|^clients/python/test_.*\.py|^performance + exclude: | + (?x) + ^.*/.*_vendor/ | + ^airflow/migrations | + ^providers/ | + ^dev | + ^scripts | + ^docs | + ^provider_packages | + ^performance/ | + ^tests/dags/test_imports.py | + ^clients/python/test_.*\.py require_serial: true additional_dependencies: ['rich>=12.4.4'] - id: mypy-airflow @@ -1306,7 +1313,7 @@ repos: name: Run mypy for providers language: python entry: ./scripts/ci/pre_commit/mypy.py --namespace-packages - files: ^airflow/providers/.*\.py$|^tests/providers/.*\.py$|^tests/system/providers/.*\.py$ + files: ^providers/src/airflow/providers/.*\.py$|^providers/tests//.*\.py$ exclude: ^.*/.*_vendor/ require_serial: true additional_dependencies: ['rich>=12.4.4'] @@ -1314,7 +1321,7 @@ repos: stages: ['manual'] name: Run mypy for providers (manual) language: python - entry: ./scripts/ci/pre_commit/mypy_folder.py airflow/providers + entry: ./scripts/ci/pre_commit/mypy_folder.py providers/src/airflow/providers pass_filenames: false files: ^.*\.py$ require_serial: true @@ -1340,14 +1347,14 @@ repos: name: Validate provider.yaml files entry: ./scripts/ci/pre_commit/check_provider_yaml_files.py language: python - files: ^airflow/providers/.*/provider\.yaml$ + files: ^providers/src/airflow/providers/.*/provider\.yaml$ additional_dependencies: ['rich>=12.4.4'] require_serial: true - id: check-template-fields-valid name: Check templated fields mapped in operators/sensors language: python entry: ./scripts/ci/pre_commit/check_template_fields.py - files: ^airflow/.*/sensors/.*\.py$|^airflow/.*/operators/.*\.py$ + files: ^(providers/src/)?airflow/.*/(sensors|operators)/.*\.py$ additional_dependencies: [ 'rich>=12.4.4' ] require_serial: true - id: update-migration-references diff --git a/.readthedocs.yml b/.readthedocs.yml index aa16e3a8e3d57..4aa28ee78b036 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -20,7 +20,7 @@ formats: [] sphinx: configuration: docs/rtd-deprecation/conf.py python: - version: "3.8" + version: "3.9" install: - method: pip path: . diff --git a/3rd-party-licenses/LICENSE-unicodecsv.txt b/3rd-party-licenses/LICENSE-unicodecsv.txt deleted file mode 100644 index 6d004c776de0a..0000000000000 --- a/3rd-party-licenses/LICENSE-unicodecsv.txt +++ /dev/null @@ -1,25 +0,0 @@ -Copyright 2010 Jeremy Dunck. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are -permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, this list of - conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, this list - of conditions and the following disclaimer in the documentation and/or other materials - provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY JEREMY DUNCK ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JEREMY DUNCK OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -The views and conclusions contained in the software and documentation are those of the -authors and should not be interpreted as representing official policies, either expressed -or implied, of Jeremy Dunck. diff --git a/Dockerfile b/Dockerfile index cfb894ac87d22..c762cae900f2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,10 +47,10 @@ ARG AIRFLOW_USER_HOME_DIR=/home/airflow # latest released version here ARG AIRFLOW_VERSION="2.10.2" -ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" +ARG PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.17 +ARG AIRFLOW_UV_VERSION=0.4.20 ARG AIRFLOW_USE_UV="false" ARG UV_HTTP_TIMEOUT="300" ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" @@ -877,8 +877,18 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then + # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file + mkdir -p ./providers/src/airflow/providers/ + touch ./providers/src/airflow/providers/__init__.py + + # Similarly we need _a_ file for task_sdk too + mkdir -p ./task_sdk/src/airflow/sdk/ + touch ./task_sdk/src/airflow/__init__.py + + trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT + # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then diff --git a/Dockerfile.ci b/Dockerfile.ci index f7b7bb4172025..0cdf9899799f3 100644 --- a/Dockerfile.ci +++ b/Dockerfile.ci @@ -16,7 +16,7 @@ # # WARNING: THIS DOCKERFILE IS NOT INTENDED FOR PRODUCTION USE OR DEPLOYMENT. # -ARG PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" +ARG PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" ############################################################################################## # This is the script image where we keep all inlined bash scripts needed in other segments @@ -676,8 +676,18 @@ function install_airflow() { # Determine the installation_command_flags based on AIRFLOW_INSTALLATION_METHOD method local installation_command_flags if [[ ${AIRFLOW_INSTALLATION_METHOD} == "." ]]; then + # We need _a_ file in there otherwise the editable install doesn't include anything in the .pth file + mkdir -p ./providers/src/airflow/providers/ + touch ./providers/src/airflow/providers/__init__.py + + # Similarly we need _a_ file for task_sdk too + mkdir -p ./task_sdk/src/airflow/sdk/ + touch ./task_sdk/src/airflow/__init__.py + + trap 'rm -f ./providers/src/airflow/providers/__init__.py ./task_sdk/src/airflow/__init__.py 2>/dev/null' EXIT + # When installing from sources - we always use `--editable` mode - installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" + installation_command_flags="--editable .[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION} --editable ./providers --editable ./task_sdk" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == "apache-airflow" ]]; then installation_command_flags="apache-airflow[${AIRFLOW_EXTRAS}]${AIRFLOW_VERSION_SPECIFICATION}" elif [[ ${AIRFLOW_INSTALLATION_METHOD} == apache-airflow\ @\ * ]]; then @@ -810,7 +820,7 @@ chmod 1777 /tmp AIRFLOW_SOURCES=$(cd "${IN_CONTAINER_DIR}/../.." || exit 1; pwd) -PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.8} +PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION:=3.9} export AIRFLOW_HOME=${AIRFLOW_HOME:=${HOME}} @@ -904,6 +914,9 @@ function environment_initialization() { # Added to have run-tests on path export PATH=${PATH}:${AIRFLOW_SOURCES} + # Directory where simple auth manager store generated passwords + export AIRFLOW_AUTH_MANAGER_CREDENTIAL_DIRECTORY="/files" + mkdir -pv "${AIRFLOW_HOME}/logs/" # Change the default worker_concurrency for tests @@ -1170,7 +1183,10 @@ SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "nounset", "-o", "n ARG PYTHON_BASE_IMAGE ARG AIRFLOW_IMAGE_REPOSITORY="https://github.com/apache/airflow" -# By increasing this number we can do force build of all dependencies +# By increasing this number we can do force build of all dependencies. +# NOTE! When you want to make sure dependencies are installed from scratch in your PR after removing +# some dependencies, you also need to set "disable image cache" in your PR to make sure the image is +# not built using the "main" version of those dependencies. ARG DEPENDENCIES_EPOCH_NUMBER="11" # Make sure noninteractive debian install is used and language variables set @@ -1262,7 +1278,7 @@ ARG DEFAULT_CONSTRAINTS_BRANCH="constraints-main" ARG AIRFLOW_CI_BUILD_EPOCH="10" ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="true" ARG AIRFLOW_PIP_VERSION=24.2 -ARG AIRFLOW_UV_VERSION=0.4.17 +ARG AIRFLOW_UV_VERSION=0.4.20 ARG AIRFLOW_USE_UV="true" # Setup PIP # By default PIP install run without cache to make image smaller @@ -1350,7 +1366,10 @@ RUN bash /scripts/docker/install_pipx_tools.sh # We copy pyproject.toml and other files needed to perform setup of dependencies # So in case pyproject.toml changes we can install latest dependencies required. COPY pyproject.toml ${AIRFLOW_SOURCES}/pyproject.toml +COPY providers/pyproject.toml ${AIRFLOW_SOURCES}/providers/pyproject.toml +COPY task_sdk/pyproject.toml ${AIRFLOW_SOURCES}/task_sdk/pyproject.toml COPY airflow/__init__.py ${AIRFLOW_SOURCES}/airflow/ +COPY tests_common/ ${AIRFLOW_SOURCES}/tests_common/ COPY generated/* ${AIRFLOW_SOURCES}/generated/ COPY constraints/* ${AIRFLOW_SOURCES}/constraints/ COPY LICENSE ${AIRFLOW_SOURCES}/LICENSE diff --git a/INSTALL b/INSTALL index 5ccabe2ff3270..6583d9de44206 100644 --- a/INSTALL +++ b/INSTALL @@ -141,9 +141,7 @@ This is what it shows currently: ┏━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Name ┃ Type ┃ Description ┃ ┡━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -│ default │ virtual │ Default environment with Python 3.8 for maximum compatibility │ -├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ -│ airflow-38 │ virtual │ Environment with Python 3.8. No devel installed. │ +│ default │ virtual │ Default environment with Python 3.9 for maximum compatibility │ ├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ │ airflow-39 │ virtual │ Environment with Python 3.9. No devel installed. │ ├─────────────┼─────────┼───────────────────────────────────────────────────────────────┤ @@ -154,7 +152,7 @@ This is what it shows currently: │ airflow-312 │ virtual │ Environment with Python 3.12. No devel installed │ └─────────────┴─────────┴───────────────────────────────────────────────────────────────┘ -The default env (if you have not used one explicitly) is `default` and it is a Python 3.8 +The default env (if you have not used one explicitly) is `default` and it is a Python 3.9 virtualenv for maximum compatibility with `devel` extra installed - this devel extra contains the minimum set of dependencies and tools that should be used during unit testing of core Airflow and running all `airflow` CLI commands - without support for providers or databases. @@ -228,15 +226,15 @@ to avoid "works-for-me" syndrome, where you use different versions of dependenci that are used in main CI tests and by other contributors. There are different constraint files for different Python versions. For example, this command will install -all basic devel requirements and requirements of Google provider as last successfully tested for Python 3.8: +all basic devel requirements and requirements of Google provider as last successfully tested for Python 3.9: pip install -e ".[devel,google]"" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.9.txt" Using the 'constraints-no-providers' constraint files, you can upgrade Airflow without paying attention to the provider's dependencies. This allows you to keep installed provider dependencies and install the latest supported ones using pure Airflow core. pip install -e ".[devel]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.9.txt" Airflow extras ============== diff --git a/INTHEWILD.md b/INTHEWILD.md index c65597acfb60f..310d018b98329 100644 --- a/INTHEWILD.md +++ b/INTHEWILD.md @@ -101,7 +101,7 @@ Currently, **officially** using Airflow: 1. [Bombora Inc](https://bombora.com/) [[@jeffkpayne](https://github.com/jeffkpayne), [@pakelley](https://github.com/pakelley), [@dNavalta](https://github.com/dNavalta), [@austynh](https://github.com/austynh), [@TheOriginalAlex](https://github.com/TheOriginalAlex)] 1. [Bonial International GmbH](https://www.bonial.com/) 1. [Bonnier Broadcasting](http://www.bonnierbroadcasting.com) [[@wileeam](https://github.com/wileeam)] -1. [Bosch (Robert Bosch GmbH)](https://www.bosch.com/stories/topics/automated-driving/) [[@jscheffl](https://github.com/jscheffl), [@clellmann](https://github.com/clellmann), [@wolfdn](https://github.com/wolfdn), [@AutomationDev85](https://github.com/AutomationDev85)] +1. [Bosch (Robert Bosch GmbH)](https://www.bosch.com/stories/topics/automated-driving/) [[@jscheffl](https://github.com/jscheffl), [@clellmann](https://github.com/clellmann), [@wolfdn](https://github.com/wolfdn), [@AutomationDev85](https://github.com/AutomationDev85), [@majorosdonat](https://github.com/majorosdonat)] 1. [BounceX](http://www.bouncex.com) [[@JoshFerge](https://github.com/JoshFerge), [@hudsonrio](https://github.com/hudsonrio), [@ronniekritou](https://github.com/ronniekritou)] 1. [Braintree](https://www.braintreepayments.com) [[@coopergillan](https://github.com/coopergillan), [@curiousjazz77](https://github.com/curiousjazz77), [@raymondberg](https://github.com/raymondberg)] 1. [Branch](https://branch.io) [[@sdebarshi](https://github.com/sdebarshi), [@dmitrig01](https://github.com/dmitrig01)] diff --git a/README.md b/README.md index 3cd6416e93405..0419ae0456070 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ Apache Airflow is tested with: | | Main version (dev) | Stable version (2.10.2) | |------------|----------------------------|----------------------------| -| Python | 3.8, 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11, 3.12 | +| Python | 3.9, 3.10, 3.11, 3.12 | 3.8, 3.9, 3.10, 3.11, 3.12 | | Platform | AMD64/ARM64(\*) | AMD64/ARM64(\*) | | Kubernetes | 1.28, 1.29, 1.30, 1.31 | 1.27, 1.28, 1.29, 1.30 | | PostgreSQL | 12, 13, 14, 15, 16, 17 | 12, 13, 14, 15, 16 | @@ -178,14 +178,14 @@ them to the appropriate format and workflow that your tool requires. ```bash pip install 'apache-airflow==2.10.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.9.txt" ``` 2. Installing with extras (i.e., postgres, google) ```bash pip install 'apache-airflow[postgres,google]==2.10.2' \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.9.txt" ``` For information on installing provider packages, check @@ -313,7 +313,7 @@ They are based on the official release schedule of Python and Kubernetes, nicely 1. We drop support for Python and Kubernetes versions when they reach EOL. Except for Kubernetes, a version stays supported by Airflow if two major cloud providers still provide support for it. We drop support for those EOL versions in main right after EOL date, and it is effectively removed when we release - the first new MINOR (Or MAJOR if there is no new MINOR version) of Airflow. For example, for Python 3.8 it + the first new MINOR (Or MAJOR if there is no new MINOR version) of Airflow. For example, for Python 3.9 it means that we will drop support in main right after 27.06.2023, and the first MAJOR or MINOR version of Airflow released after will not have it. diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 69e666461efae..c2408bd63f504 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -107,6 +107,12 @@ Airflow 2.10.0 (2024-08-15) Significant Changes ^^^^^^^^^^^^^^^^^^^ +Scarf based telemetry: Airflow now collect telemetry data (#39510) +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +Airflow integrates Scarf to collect basic usage data during operation. Deployments can opt-out of data collection by +setting the ``[usage_data_collection]enabled`` option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable. +See :ref:`Usage data collection FAQ ` for more information. + Datasets no longer trigger inactive DAGs (#38891) """"""""""""""""""""""""""""""""""""""""""""""""" @@ -155,12 +161,6 @@ Previously known as hybrid executors, this new feature allows Airflow to use mul to use a specific executor that suits its needs best. A single DAG can contain tasks all using different executors. Please see the Airflow documentation for more details. Note: This feature is still experimental. See `documentation on Executor `_ for a more detailed description. -Scarf based telemetry: Does Airflow collect any telemetry data? (#39510) -"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -Airflow integrates Scarf to collect basic usage data during operation. Deployments can opt-out of data collection by setting the ``[usage_data_collection]enabled`` option to False, or the SCARF_ANALYTICS=false environment variable. -See `FAQ on this `_ for more information. - - New Features """""""""""" - AIP-61 Hybrid Execution (`AIP-61 `_) diff --git a/airflow/api_connexion/endpoints/backfill_endpoint.py b/airflow/api_connexion/endpoints/backfill_endpoint.py index a0e728c5bc464..94d6ad21f05f1 100644 --- a/airflow/api_connexion/endpoints/backfill_endpoint.py +++ b/airflow/api_connexion/endpoints/backfill_endpoint.py @@ -19,14 +19,14 @@ import logging from functools import wraps -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING -import pendulum -from pendulum import DateTime +from flask import request +from marshmallow import ValidationError from sqlalchemy import select from airflow.api_connexion import security -from airflow.api_connexion.exceptions import Conflict, NotFound +from airflow.api_connexion.exceptions import BadRequest, Conflict, NotFound from airflow.api_connexion.schemas.backfill_schema import ( BackfillCollection, backfill_collection_schema, @@ -42,6 +42,8 @@ from airflow.www.decorators import action_logging if TYPE_CHECKING: + from datetime import datetime + from sqlalchemy.orm import Session from airflow.api_connexion.types import APIResponse @@ -119,12 +121,33 @@ def get_backfill(*, backfill_id: int, session: Session = NEW_SESSION, **kwargs): raise NotFound("Backfill not found") +def backfill_obj_to_kwargs(f): + """ + Convert the request body (containing backfill object json) to kwargs. + + The main point here is to be compatible with the ``requires_access_dag`` decorator, + which takes dag_id kwarg and doesn't support json request body. + """ + + @wraps(f) + def inner(): + body = request.json + try: + obj = backfill_schema.load(body) + except ValidationError as err: + raise BadRequest(detail=str(err.messages)) + return f(**obj) + + return inner + + +@backfill_obj_to_kwargs @security.requires_access_dag("PUT") @action_logging def create_backfill( dag_id: str, - from_date: str, - to_date: str, + from_date: datetime, + to_date: datetime, max_active_runs: int = 10, reverse: bool = False, dag_run_conf: dict | None = None, @@ -132,8 +155,8 @@ def create_backfill( try: backfill_obj = _create_backfill( dag_id=dag_id, - from_date=cast(DateTime, pendulum.parse(from_date)), - to_date=cast(DateTime, pendulum.parse(to_date)), + from_date=from_date, + to_date=to_date, max_active_runs=max_active_runs, reverse=reverse, dag_run_conf=dag_run_conf, diff --git a/airflow/api_connexion/endpoints/dag_endpoint.py b/airflow/api_connexion/endpoints/dag_endpoint.py index 3d0d3dd8bfabe..0352297bfffd4 100644 --- a/airflow/api_connexion/endpoints/dag_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_endpoint.py @@ -52,6 +52,7 @@ from airflow.api_connexion.types import APIResponse, UpdateMask +@mark_fastapi_migration_done @security.requires_access_dag("GET") @provide_session def get_dag( @@ -215,6 +216,7 @@ def patch_dags(limit, session, offset=0, only_active=True, tags=None, dag_id_pat return dags_collection_schema.dump(DAGCollection(dags=dags, total_entries=total_entries)) +@mark_fastapi_migration_done @security.requires_access_dag("DELETE") @action_logging @provide_session diff --git a/airflow/api_connexion/endpoints/dag_run_endpoint.py b/airflow/api_connexion/endpoints/dag_run_endpoint.py index 44891c0ef2c84..74eae13ddd4d0 100644 --- a/airflow/api_connexion/endpoints/dag_run_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_run_endpoint.py @@ -63,6 +63,7 @@ from airflow.models import DagModel, DagRun from airflow.timetables.base import DataInterval from airflow.utils.airflow_flask_app import get_airflow_app +from airflow.utils.api_migration import mark_fastapi_migration_done from airflow.utils.db import get_query_count from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import DagRunState @@ -77,6 +78,7 @@ from airflow.api_connexion.types import APIResponse +@mark_fastapi_migration_done @security.requires_access_dag("DELETE", DagAccessEntity.RUN) @provide_session @action_logging @@ -90,6 +92,7 @@ def delete_dag_run(*, dag_id: str, dag_run_id: str, session: Session = NEW_SESSI return NoContent, HTTPStatus.NO_CONTENT +@mark_fastapi_migration_done @security.requires_access_dag("GET", DagAccessEntity.RUN) @provide_session def get_dag_run( diff --git a/airflow/api_connexion/endpoints/dag_stats_endpoint.py b/airflow/api_connexion/endpoints/dag_stats_endpoint.py index 705af10d41d99..3b6c6ab8e0df3 100644 --- a/airflow/api_connexion/endpoints/dag_stats_endpoint.py +++ b/airflow/api_connexion/endpoints/dag_stats_endpoint.py @@ -39,24 +39,40 @@ @security.requires_access_dag("GET", DagAccessEntity.RUN) @provide_session -def get_dag_stats(*, dag_ids: str, session: Session = NEW_SESSION) -> APIResponse: +def get_dag_stats( + *, + dag_ids: str | None = None, + limit: int | None = None, + offset: int | None = None, + session: Session = NEW_SESSION, +) -> APIResponse: """Get Dag statistics.""" allowed_dag_ids = get_auth_manager().get_permitted_dag_ids(methods=["GET"], user=g.user) - dags_list = set(dag_ids.split(",")) - filter_dag_ids = dags_list.intersection(allowed_dag_ids) + if dag_ids: + dags_list = set(dag_ids.split(",")) + filter_dag_ids = dags_list.intersection(allowed_dag_ids) + else: + filter_dag_ids = allowed_dag_ids + query_dag_ids = sorted(list(filter_dag_ids)) + if offset is not None: + query_dag_ids = query_dag_ids[offset:] + if limit is not None: + query_dag_ids = query_dag_ids[:limit] query = ( select(DagRun.dag_id, DagRun.state, func.count(DagRun.state)) .group_by(DagRun.dag_id, DagRun.state) - .where(DagRun.dag_id.in_(filter_dag_ids)) + .where(DagRun.dag_id.in_(query_dag_ids)) ) dag_state_stats = session.execute(query) - dag_state_data = {(dag_id, state): count for dag_id, state, count in dag_state_stats} - dag_stats = { - dag_id: [{"state": state, "count": dag_state_data.get((dag_id, state), 0)} for state in DagRunState] - for dag_id in filter_dag_ids - } - - dags = [{"dag_id": stat, "stats": dag_stats[stat]} for stat in dag_stats] - return dag_stats_collection_schema.dump({"dags": dags, "total_entries": len(dag_stats)}) + dags = [ + { + "dag_id": dag_id, + "stats": [ + {"state": state, "count": dag_state_data.get((dag_id, state), 0)} for state in DagRunState + ], + } + for dag_id in query_dag_ids + ] + return dag_stats_collection_schema.dump({"dags": dags, "total_entries": len(dags)}) diff --git a/airflow/api_connexion/endpoints/variable_endpoint.py b/airflow/api_connexion/endpoints/variable_endpoint.py index 1375484a422fc..20e7ce1edeabe 100644 --- a/airflow/api_connexion/endpoints/variable_endpoint.py +++ b/airflow/api_connexion/endpoints/variable_endpoint.py @@ -31,6 +31,7 @@ from airflow.api_connexion.schemas.variable_schema import variable_collection_schema, variable_schema from airflow.models import Variable from airflow.security import permissions +from airflow.utils.api_migration import mark_fastapi_migration_done from airflow.utils.log.action_logger import action_event_from_permission from airflow.utils.session import NEW_SESSION, provide_session from airflow.www.decorators import action_logging @@ -43,6 +44,7 @@ RESOURCE_EVENT_PREFIX = "variable" +@mark_fastapi_migration_done @security.requires_access_variable("DELETE") @action_logging( event=action_event_from_permission( @@ -57,6 +59,7 @@ def delete_variable(*, variable_key: str) -> Response: return Response(status=HTTPStatus.NO_CONTENT) +@mark_fastapi_migration_done @security.requires_access_variable("GET") @provide_session def get_variable(*, variable_key: str, session: Session = NEW_SESSION) -> Response: @@ -92,6 +95,7 @@ def get_variables( ) +@mark_fastapi_migration_done @security.requires_access_variable("PUT") @provide_session @action_logging( @@ -126,6 +130,7 @@ def patch_variable( return variable_schema.dump(variable) +@mark_fastapi_migration_done @security.requires_access_variable("POST") @action_logging( event=action_event_from_permission( diff --git a/airflow/api_connexion/openapi/v1.yaml b/airflow/api_connexion/openapi/v1.yaml index 3e120ce2f5118..e99f91639c49e 100644 --- a/airflow/api_connexion/openapi/v1.yaml +++ b/airflow/api_connexion/openapi/v1.yaml @@ -277,57 +277,12 @@ paths: x-openapi-router-controller: airflow.api_connexion.endpoints.backfill_endpoint operationId: create_backfill tags: [Backfill] - parameters: - - name: dag_id - in: query - schema: - type: string - required: true - description: | - Create dag runs for this dag. - - - name: from_date - in: query - schema: - type: string - format: date-time - required: true - description: | - Create dag runs with logical dates from this date onward, including this date. - - - name: to_date - in: query - schema: - type: string - format: date-time - required: true - description: | - Create dag runs for logical dates up to but not including this date. - - - name: max_active_runs - in: query - schema: - type: integer - required: false - description: | - Maximum number of active DAG runs for the the backfill. - - - name: reverse - in: query - schema: - type: boolean - required: false - description: | - If true, run the dag runs in descending order of logical date. - - - name: config - in: query - schema: - # todo: AIP-78 make this object - type: string - required: false - description: | - If true, run the dag runs in descending order of logical date. + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/Backfill" responses: "200": description: Success. @@ -2429,11 +2384,13 @@ paths: operationId: get_dag_stats tags: [DagStats] parameters: + - $ref: "#/components/parameters/PageLimit" + - $ref: "#/components/parameters/PageOffset" - name: dag_ids in: query schema: type: string - required: true + required: false description: | One or more DAG IDs separated by commas to filter relevant Dags. responses: @@ -2920,7 +2877,7 @@ components: nullable: true description: To date of the backfill (exclusive). dag_run_conf: - type: string + type: object nullable: true description: Dag run conf to be forwarded to the dag runs. is_paused: @@ -3463,7 +3420,7 @@ components: allOf: - type: object properties: - import_errors: + dag_warnings: type: array items: $ref: "#/components/schemas/DagWarning" diff --git a/airflow/api_connexion/schemas/backfill_schema.py b/airflow/api_connexion/schemas/backfill_schema.py index 7f83d76df6dfc..db496bf1ac5d5 100644 --- a/airflow/api_connexion/schemas/backfill_schema.py +++ b/airflow/api_connexion/schemas/backfill_schema.py @@ -34,15 +34,16 @@ class Meta: model = Backfill id = auto_field(dump_only=True) - dag_id = auto_field(dump_only=True) - from_date = auto_field(dump_only=True) - to_date = auto_field(dump_only=True) + dag_id = auto_field() + from_date = auto_field() + to_date = auto_field() dag_run_conf = fields.Dict(allow_none=True) - is_paused = auto_field(dump_only=True) - max_active_runs = auto_field(dump_only=True) - created_at = auto_field(dump_only=True) - completed_at = auto_field(dump_only=True) - updated_at = auto_field(dump_only=True) + reverse = fields.Boolean() + is_paused = auto_field() + max_active_runs = auto_field() + created_at = auto_field() + completed_at = auto_field() + updated_at = auto_field() class BackfillDagRunSchema(SQLAlchemySchema): diff --git a/airflow/api_fastapi/openapi/v1-generated.yaml b/airflow/api_fastapi/openapi/v1-generated.yaml index d272dd03d9302..56f48c73e987f 100644 --- a/airflow/api_fastapi/openapi/v1-generated.yaml +++ b/airflow/api_fastapi/openapi/v1-generated.yaml @@ -34,7 +34,46 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /public/dags: + /ui/dashboard/historical_metrics_data: + get: + tags: + - Dashboard + summary: Historical Metrics + description: Return cluster activity historical metrics. + operationId: historical_metrics + parameters: + - name: start_date + in: query + required: true + schema: + type: string + title: Start Date + - name: end_date + in: query + required: true + schema: + type: string + title: End Date + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HistoricalMetricDataResponse' + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /public/dags/: get: tags: - DAG @@ -252,13 +291,13 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' - /public/dags/{dag_id}/details: + /public/dags/{dag_id}: get: tags: - DAG - summary: Get Dag Details - description: Get details of DAG. - operationId: get_dag_details + summary: Get Dag + description: Get basic information about a DAG. + operationId: get_dag parameters: - name: dag_id in: path @@ -272,7 +311,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/DAGDetailsResponse' + $ref: '#/components/schemas/DAGResponse' '400': content: application/json: @@ -303,7 +342,6 @@ paths: schema: $ref: '#/components/schemas/HTTPExceptionResponse' description: Unprocessable Entity - /public/dags/{dag_id}: patch: tags: - DAG @@ -370,6 +408,106 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - DAG + summary: Delete Dag + description: Delete the specific DAG. + operationId: delete_dag + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: {} + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unprocessable Entity + /public/dags/{dag_id}/details: + get: + tags: + - DAG + summary: Get Dag Details + description: Get details of DAG. + operationId: get_dag_details + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/DAGDetailsResponse' + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unprocessable Entity /public/connections/{connection_id}: delete: tags: @@ -455,6 +593,297 @@ paths: application/json: schema: $ref: '#/components/schemas/HTTPValidationError' + /public/variables/{variable_key}: + delete: + tags: + - Variable + summary: Delete Variable + description: Delete a variable entry. + operationId: delete_variable + parameters: + - name: variable_key + in: path + required: true + schema: + type: string + title: Variable Key + responses: + '204': + description: Successful Response + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + get: + tags: + - Variable + summary: Get Variable + description: Get a variable entry. + operationId: get_variable + parameters: + - name: variable_key + in: path + required: true + schema: + type: string + title: Variable Key + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/VariableResponse' + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + patch: + tags: + - Variable + summary: Patch Variable + description: Update a variable by key. + operationId: patch_variable + parameters: + - name: variable_key + in: path + required: true + schema: + type: string + title: Variable Key + - name: update_mask + in: query + required: false + schema: + anyOf: + - type: array + items: + type: string + - type: 'null' + title: Update Mask + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/VariableBody' + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/VariableResponse' + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /public/variables/: + post: + tags: + - Variable + summary: Post Variable + description: Create a variable. + operationId: post_variable + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/VariableBody' + required: true + responses: + '201': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/VariableResponse' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '403': + description: Forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /public/dags/{dag_id}/dagRuns/{dag_run_id}: + get: + tags: + - DagRun + summary: Get Dag Run + operationId: get_dag_run + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: dag_run_id + in: path + required: true + schema: + type: string + title: Dag Run Id + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/DAGRunResponse' + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + delete: + tags: + - DagRun + summary: Delete Dag Run + description: Delete a DAG Run entry. + operationId: delete_dag_run + parameters: + - name: dag_id + in: path + required: true + schema: + type: string + title: Dag Id + - name: dag_run_id + in: path + required: true + schema: + type: string + title: Dag Run Id + responses: + '204': + description: Successful Response + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request + '401': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Unauthorized + '403': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Forbidden + '404': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Not Found + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' components: schemas: ConnectionResponse: @@ -769,7 +1198,7 @@ components: required: - is_paused title: DAGPatchBody - description: Dag Serializer for updatable body. + description: Dag Serializer for updatable bodies. DAGResponse: properties: dag_id: @@ -923,6 +1352,131 @@ components: - file_token title: DAGResponse description: DAG serializer for responses. + DAGRunResponse: + properties: + run_id: + anyOf: + - type: string + - type: 'null' + title: Run Id + dag_id: + type: string + title: Dag Id + logical_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Logical Date + start_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Start Date + end_date: + anyOf: + - type: string + format: date-time + - type: 'null' + title: End Date + data_interval_start: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Data Interval Start + data_interval_end: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Data Interval End + last_scheduling_decision: + anyOf: + - type: string + format: date-time + - type: 'null' + title: Last Scheduling Decision + run_type: + $ref: '#/components/schemas/DagRunType' + state: + $ref: '#/components/schemas/DagRunState' + external_trigger: + type: boolean + title: External Trigger + triggered_by: + $ref: '#/components/schemas/DagRunTriggeredByType' + conf: + type: object + title: Conf + note: + anyOf: + - type: string + - type: 'null' + title: Note + type: object + required: + - run_id + - dag_id + - logical_date + - start_date + - end_date + - data_interval_start + - data_interval_end + - last_scheduling_decision + - run_type + - state + - external_trigger + - triggered_by + - conf + - note + title: DAGRunResponse + description: DAG Run serializer for responses. + DAGRunStates: + properties: + queued: + type: integer + title: Queued + running: + type: integer + title: Running + success: + type: integer + title: Success + failed: + type: integer + title: Failed + type: object + required: + - queued + - running + - success + - failed + title: DAGRunStates + description: DAG Run States for responses. + DAGRunTypes: + properties: + backfill: + type: integer + title: Backfill + scheduled: + type: integer + title: Scheduled + manual: + type: integer + title: Manual + dataset_triggered: + type: integer + title: Dataset Triggered + type: object + required: + - backfill + - scheduled + - manual + - dataset_triggered + title: DAGRunTypes + description: DAG Run Types for responses. DagRunState: type: string enum: @@ -939,6 +1493,28 @@ components: so please ensure that their values always match the ones with the same name in TaskInstanceState.' + DagRunTriggeredByType: + type: string + enum: + - cli + - operator + - rest_api + - ui + - test + - timetable + - dataset + - backfill + title: DagRunTriggeredByType + description: Class with TriggeredBy types for DagRun. + DagRunType: + type: string + enum: + - backfill + - scheduled + - manual + - dataset_triggered + title: DagRunType + description: Class with DagRun types. DagTagPydantic: properties: name: @@ -975,6 +1551,79 @@ components: title: Detail type: object title: HTTPValidationError + HistoricalMetricDataResponse: + properties: + dag_run_types: + $ref: '#/components/schemas/DAGRunTypes' + dag_run_states: + $ref: '#/components/schemas/DAGRunStates' + task_instance_states: + $ref: '#/components/schemas/TaskInstantState' + type: object + required: + - dag_run_types + - dag_run_states + - task_instance_states + title: HistoricalMetricDataResponse + description: Historical Metric Data serializer for responses. + TaskInstantState: + properties: + no_status: + type: integer + title: No Status + removed: + type: integer + title: Removed + scheduled: + type: integer + title: Scheduled + queued: + type: integer + title: Queued + running: + type: integer + title: Running + success: + type: integer + title: Success + restarting: + type: integer + title: Restarting + failed: + type: integer + title: Failed + up_for_retry: + type: integer + title: Up For Retry + up_for_reschedule: + type: integer + title: Up For Reschedule + upstream_failed: + type: integer + title: Upstream Failed + skipped: + type: integer + title: Skipped + deferred: + type: integer + title: Deferred + type: object + required: + - no_status + - removed + - scheduled + - queued + - running + - success + - restarting + - failed + - up_for_retry + - up_for_reschedule + - upstream_failed + - skipped + - deferred + title: TaskInstantState + description: TaskInstance serializer for responses. ValidationError: properties: loc: @@ -996,3 +1645,47 @@ components: - msg - type title: ValidationError + VariableBody: + properties: + key: + type: string + title: Key + description: + anyOf: + - type: string + - type: 'null' + title: Description + value: + anyOf: + - type: string + - type: 'null' + title: Value + type: object + required: + - key + - description + - value + title: VariableBody + description: Variable serializer for bodies. + VariableResponse: + properties: + key: + type: string + title: Key + description: + anyOf: + - type: string + - type: 'null' + title: Description + value: + anyOf: + - type: string + - type: 'null' + title: Value + type: object + required: + - key + - description + - value + title: VariableResponse + description: Variable serializer for responses. diff --git a/airflow/api_fastapi/parameters.py b/airflow/api_fastapi/parameters.py index 504014602f3b5..59d61ad68600d 100644 --- a/airflow/api_fastapi/parameters.py +++ b/airflow/api_fastapi/parameters.py @@ -18,14 +18,18 @@ from __future__ import annotations from abc import ABC, abstractmethod +from datetime import datetime from typing import TYPE_CHECKING, Any, Generic, List, TypeVar from fastapi import Depends, HTTPException, Query +from pendulum.parsing.exceptions import ParserError +from pydantic import AfterValidator from sqlalchemy import case, or_ from typing_extensions import Annotated, Self from airflow.models.dag import DagModel, DagTag from airflow.models.dagrun import DagRun +from airflow.utils import timezone from airflow.utils.state import DagRunState if TYPE_CHECKING: @@ -235,6 +239,24 @@ def depends(self, last_dag_run_state: DagRunState | None = None) -> _LastDagRunS return self.set_value(last_dag_run_state) +def _safe_parse_datetime(date_to_check: str) -> datetime: + """ + Parse datetime and raise error for invalid dates. + + :param date_to_check: the string value to be parsed + """ + if not date_to_check: + raise ValueError(f"{date_to_check} cannot be None.") + try: + return timezone.parse(date_to_check, strict=True) + except (TypeError, ParserError): + raise HTTPException( + 400, f"Invalid datetime: {date_to_check!r}. Please check the date parameter have this value." + ) + + +# Common Safe DateTime +DateTimeQuery = Annotated[str, AfterValidator(_safe_parse_datetime)] # DAG QueryLimit = Annotated[_LimitFilter, Depends(_LimitFilter().depends)] QueryOffset = Annotated[_OffsetFilter, Depends(_OffsetFilter().depends)] diff --git a/airflow/api_fastapi/serializers/dag_run.py b/airflow/api_fastapi/serializers/dag_run.py new file mode 100644 index 0000000000000..4622fac645c07 --- /dev/null +++ b/airflow/api_fastapi/serializers/dag_run.py @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, Field + +from airflow.utils.state import DagRunState +from airflow.utils.types import DagRunTriggeredByType, DagRunType + + +class DAGRunResponse(BaseModel): + """DAG Run serializer for responses.""" + + dag_run_id: str | None = Field(alias="run_id") + dag_id: str + logical_date: datetime | None + start_date: datetime | None + end_date: datetime | None + data_interval_start: datetime | None + data_interval_end: datetime | None + last_scheduling_decision: datetime | None + run_type: DagRunType + state: DagRunState + external_trigger: bool + triggered_by: DagRunTriggeredByType + conf: dict + note: str | None diff --git a/airflow/api_fastapi/serializers/dags.py b/airflow/api_fastapi/serializers/dags.py index 17677054c4c1c..c9d48aac222eb 100644 --- a/airflow/api_fastapi/serializers/dags.py +++ b/airflow/api_fastapi/serializers/dags.py @@ -24,9 +24,9 @@ from itsdangerous import URLSafeSerializer from pendulum.tz.timezone import FixedTimezone, Timezone from pydantic import ( - AliasChoices, + AliasGenerator, BaseModel, - Field, + ConfigDict, computed_field, field_validator, ) @@ -77,6 +77,14 @@ def get_owners(cls, v: Any) -> list[str] | None: return v.split(",") return v + @field_validator("timetable_summary", mode="before") + @classmethod + def get_timetable_summary(cls, tts: str | None) -> str | None: + """Validate the string representation of timetable_summary.""" + if tts is None or tts == "None": + return None + return str(tts) + # Mypy issue https://github.com/python/mypy/issues/1362 @computed_field # type: ignore[misc] @property @@ -87,7 +95,7 @@ def file_token(self) -> str: class DAGPatchBody(BaseModel): - """Dag Serializer for updatable body.""" + """Dag Serializer for updatable bodies.""" is_paused: bool @@ -103,9 +111,7 @@ class DAGDetailsResponse(DAGResponse): """Specific serializer for DAG Details responses.""" catchup: bool - dag_run_timeout: timedelta | None = Field( - validation_alias=AliasChoices("dag_run_timeout", "dagrun_timeout") - ) + dag_run_timeout: timedelta | None dataset_expression: dict | None doc_md: str | None start_date: datetime | None @@ -114,11 +120,19 @@ class DAGDetailsResponse(DAGResponse): orientation: str params: abc.MutableMapping | None render_template_as_native_obj: bool - template_search_path: Iterable[str] | None = Field( - validation_alias=AliasChoices("template_search_path", "template_searchpath") - ) + template_search_path: Iterable[str] | None timezone: str | None - last_parsed: datetime | None = Field(validation_alias=AliasChoices("last_parsed", "last_loaded")) + last_parsed: datetime | None + + model_config = ConfigDict( + alias_generator=AliasGenerator( + validation_alias=lambda field_name: { + "dag_run_timeout": "dagrun_timeout", + "last_parsed": "last_loaded", + "template_search_path": "template_searchpath", + }.get(field_name, field_name), + ) + ) @field_validator("timezone", mode="before") @classmethod @@ -128,14 +142,6 @@ def get_timezone(cls, tz: Timezone | FixedTimezone) -> str | None: return None return str(tz) - @field_validator("timetable_summary", mode="before") - @classmethod - def get_timetable_summary(cls, tts: str | None) -> str | None: - """Validate the string representation of timetable_summary.""" - if tts is None or tts == "None": - return None - return str(tts) - @field_validator("params", mode="before") @classmethod def get_params(cls, params: abc.MutableMapping | None) -> dict | None: diff --git a/airflow/api_fastapi/serializers/dashboard.py b/airflow/api_fastapi/serializers/dashboard.py new file mode 100644 index 0000000000000..f5a38fa22ea87 --- /dev/null +++ b/airflow/api_fastapi/serializers/dashboard.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from pydantic import BaseModel + + +class DAGRunTypes(BaseModel): + """DAG Run Types for responses.""" + + backfill: int + scheduled: int + manual: int + dataset_triggered: int + + +class DAGRunStates(BaseModel): + """DAG Run States for responses.""" + + queued: int + running: int + success: int + failed: int + + +class TaskInstantState(BaseModel): + """TaskInstance serializer for responses.""" + + no_status: int + removed: int + scheduled: int + queued: int + running: int + success: int + restarting: int + failed: int + up_for_retry: int + up_for_reschedule: int + upstream_failed: int + skipped: int + deferred: int + + +class HistoricalMetricDataResponse(BaseModel): + """Historical Metric Data serializer for responses.""" + + dag_run_types: DAGRunTypes + dag_run_states: DAGRunStates + task_instance_states: TaskInstantState diff --git a/airflow/api_fastapi/serializers/variables.py b/airflow/api_fastapi/serializers/variables.py new file mode 100644 index 0000000000000..1ecc87425a24f --- /dev/null +++ b/airflow/api_fastapi/serializers/variables.py @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import json + +from pydantic import BaseModel, ConfigDict, Field, model_validator +from typing_extensions import Self + +from airflow.utils.log.secrets_masker import redact + + +class VariableBase(BaseModel): + """Base Variable serializer.""" + + model_config = ConfigDict(populate_by_name=True) + + key: str + description: str | None + + +class VariableResponse(VariableBase): + """Variable serializer for responses.""" + + val: str | None = Field(alias="value") + + @model_validator(mode="after") + def redact_val(self) -> Self: + if self.val is None: + return self + try: + val_dict = json.loads(self.val) + redacted_dict = redact(val_dict, max_depth=1) + self.val = json.dumps(redacted_dict) + return self + except json.JSONDecodeError: + # value is not a serialized string representation of a dict. + self.val = redact(self.val, self.key) + return self + + +class VariableBody(VariableBase): + """Variable serializer for bodies.""" + + value: str | None diff --git a/airflow/api_fastapi/views/public/__init__.py b/airflow/api_fastapi/views/public/__init__.py index 9c0eefebb875e..9d90a0966802c 100644 --- a/airflow/api_fastapi/views/public/__init__.py +++ b/airflow/api_fastapi/views/public/__init__.py @@ -18,7 +18,9 @@ from __future__ import annotations from airflow.api_fastapi.views.public.connections import connections_router +from airflow.api_fastapi.views.public.dag_run import dag_run_router from airflow.api_fastapi.views.public.dags import dags_router +from airflow.api_fastapi.views.public.variables import variables_router from airflow.api_fastapi.views.router import AirflowRouter public_router = AirflowRouter(prefix="/public") @@ -26,3 +28,5 @@ public_router.include_router(dags_router) public_router.include_router(connections_router) +public_router.include_router(variables_router) +public_router.include_router(dag_run_router) diff --git a/airflow/api_fastapi/views/public/connections.py b/airflow/api_fastapi/views/public/connections.py index 850a017988162..94e9b614e9c02 100644 --- a/airflow/api_fastapi/views/public/connections.py +++ b/airflow/api_fastapi/views/public/connections.py @@ -27,11 +27,11 @@ from airflow.api_fastapi.views.router import AirflowRouter from airflow.models import Connection -connections_router = AirflowRouter(tags=["Connection"]) +connections_router = AirflowRouter(tags=["Connection"], prefix="/connections") @connections_router.delete( - "/connections/{connection_id}", + "/{connection_id}", status_code=204, responses=create_openapi_http_exception_doc([401, 403, 404]), ) @@ -49,7 +49,7 @@ async def delete_connection( @connections_router.get( - "/connections/{connection_id}", + "/{connection_id}", responses=create_openapi_http_exception_doc([401, 403, 404]), ) async def get_connection( diff --git a/airflow/api_fastapi/views/public/dag_run.py b/airflow/api_fastapi/views/public/dag_run.py new file mode 100644 index 0000000000000..2f44f03ea6103 --- /dev/null +++ b/airflow/api_fastapi/views/public/dag_run.py @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from fastapi import Depends, HTTPException +from sqlalchemy import select +from sqlalchemy.orm import Session +from typing_extensions import Annotated + +from airflow.api_fastapi.db.common import get_session +from airflow.api_fastapi.openapi.exceptions import create_openapi_http_exception_doc +from airflow.api_fastapi.serializers.dag_run import DAGRunResponse +from airflow.api_fastapi.views.router import AirflowRouter +from airflow.models import DagRun + +dag_run_router = AirflowRouter(tags=["DagRun"], prefix="/dags/{dag_id}/dagRuns") + + +@dag_run_router.get("/{dag_run_id}", responses=create_openapi_http_exception_doc([401, 403, 404])) +async def get_dag_run( + dag_id: str, dag_run_id: str, session: Annotated[Session, Depends(get_session)] +) -> DAGRunResponse: + dag_run = session.scalar(select(DagRun).filter_by(dag_id=dag_id, run_id=dag_run_id)) + if dag_run is None: + raise HTTPException( + 404, f"The DagRun with dag_id: `{dag_id}` and run_id: `{dag_run_id}` was not found" + ) + + return DAGRunResponse.model_validate(dag_run, from_attributes=True) + + +@dag_run_router.delete( + "/{dag_run_id}", status_code=204, responses=create_openapi_http_exception_doc([400, 401, 403, 404]) +) +async def delete_dag_run(dag_id: str, dag_run_id: str, session: Annotated[Session, Depends(get_session)]): + """Delete a DAG Run entry.""" + dag_run = session.scalar(select(DagRun).filter_by(dag_id=dag_id, run_id=dag_run_id)) + + if dag_run is None: + raise HTTPException( + 404, f"The DagRun with dag_id: `{dag_id}` and run_id: `{dag_run_id}` was not found" + ) + + session.delete(dag_run) diff --git a/airflow/api_fastapi/views/public/dags.py b/airflow/api_fastapi/views/public/dags.py index ef76e184505c6..eb8233a7f700d 100644 --- a/airflow/api_fastapi/views/public/dags.py +++ b/airflow/api_fastapi/views/public/dags.py @@ -17,11 +17,12 @@ from __future__ import annotations -from fastapi import Depends, HTTPException, Query, Request +from fastapi import Depends, HTTPException, Query, Request, Response from sqlalchemy import update from sqlalchemy.orm import Session from typing_extensions import Annotated +from airflow.api.common import delete_dag as delete_dag_module from airflow.api_fastapi.db.common import ( get_session, paginated_select, @@ -48,12 +49,13 @@ DAGResponse, ) from airflow.api_fastapi.views.router import AirflowRouter +from airflow.exceptions import AirflowException, DagNotFound from airflow.models import DAG, DagModel -dags_router = AirflowRouter(tags=["DAG"]) +dags_router = AirflowRouter(tags=["DAG"], prefix="/dags") -@dags_router.get("/dags") +@dags_router.get("/") async def get_dags( limit: QueryLimit, offset: QueryOffset, @@ -92,9 +94,27 @@ async def get_dags( ) -@dags_router.get( - "/dags/{dag_id}/details", responses=create_openapi_http_exception_doc([400, 401, 403, 404, 422]) -) +@dags_router.get("/{dag_id}", responses=create_openapi_http_exception_doc([400, 401, 403, 404, 422])) +async def get_dag( + dag_id: str, session: Annotated[Session, Depends(get_session)], request: Request +) -> DAGResponse: + """Get basic information about a DAG.""" + dag: DAG = request.app.state.dag_bag.get_dag(dag_id) + if not dag: + raise HTTPException(404, f"Dag with id {dag_id} was not found") + + dag_model: DagModel = session.get(DagModel, dag_id) + if not dag_model: + raise HTTPException(404, f"Unable to obtain dag with id {dag_id} from session") + + for key, value in dag.__dict__.items(): + if not key.startswith("_") and not hasattr(dag_model, key): + setattr(dag_model, key, value) + + return DAGResponse.model_validate(dag_model, from_attributes=True) + + +@dags_router.get("/{dag_id}/details", responses=create_openapi_http_exception_doc([400, 401, 403, 404, 422])) async def get_dag_details( dag_id: str, session: Annotated[Session, Depends(get_session)], request: Request ) -> DAGDetailsResponse: @@ -114,7 +134,7 @@ async def get_dag_details( return DAGDetailsResponse.model_validate(dag_model, from_attributes=True) -@dags_router.patch("/dags/{dag_id}", responses=create_openapi_http_exception_doc([400, 401, 403, 404])) +@dags_router.patch("/{dag_id}", responses=create_openapi_http_exception_doc([400, 401, 403, 404])) async def patch_dag( dag_id: str, patch_body: DAGPatchBody, @@ -141,7 +161,7 @@ async def patch_dag( return DAGResponse.model_validate(dag, from_attributes=True) -@dags_router.patch("/dags", responses=create_openapi_http_exception_doc([400, 401, 403, 404])) +@dags_router.patch("/", responses=create_openapi_http_exception_doc([400, 401, 403, 404])) async def patch_dags( patch_body: DAGPatchBody, limit: QueryLimit, @@ -186,3 +206,18 @@ async def patch_dags( dags=[DAGResponse.model_validate(dag, from_attributes=True) for dag in dags], total_entries=total_entries, ) + + +@dags_router.delete("/{dag_id}", responses=create_openapi_http_exception_doc([400, 401, 403, 404, 422])) +async def delete_dag( + dag_id: str, + session: Annotated[Session, Depends(get_session)], +) -> Response: + """Delete the specific DAG.""" + try: + delete_dag_module.delete_dag(dag_id, session=session) + except DagNotFound: + raise HTTPException(404, f"Dag with id: {dag_id} was not found") + except AirflowException: + raise HTTPException(409, f"Task instances of dag with id: '{dag_id}' are still running") + return Response(status_code=204) diff --git a/airflow/api_fastapi/views/public/variables.py b/airflow/api_fastapi/views/public/variables.py new file mode 100644 index 0000000000000..a61b9bb930023 --- /dev/null +++ b/airflow/api_fastapi/views/public/variables.py @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from fastapi import Depends, HTTPException, Query +from sqlalchemy import select +from sqlalchemy.orm import Session +from typing_extensions import Annotated + +from airflow.api_fastapi.db.common import get_session +from airflow.api_fastapi.openapi.exceptions import create_openapi_http_exception_doc +from airflow.api_fastapi.serializers.variables import VariableBody, VariableResponse +from airflow.api_fastapi.views.router import AirflowRouter +from airflow.models.variable import Variable + +variables_router = AirflowRouter(tags=["Variable"], prefix="/variables") + + +@variables_router.delete( + "/{variable_key}", + status_code=204, + responses=create_openapi_http_exception_doc([401, 403, 404]), +) +async def delete_variable( + variable_key: str, + session: Annotated[Session, Depends(get_session)], +): + """Delete a variable entry.""" + if Variable.delete(variable_key, session) == 0: + raise HTTPException(404, f"The Variable with key: `{variable_key}` was not found") + + +@variables_router.get("/{variable_key}", responses=create_openapi_http_exception_doc([401, 403, 404])) +async def get_variable( + variable_key: str, + session: Annotated[Session, Depends(get_session)], +) -> VariableResponse: + """Get a variable entry.""" + variable = session.scalar(select(Variable).where(Variable.key == variable_key).limit(1)) + + if variable is None: + raise HTTPException(404, f"The Variable with key: `{variable_key}` was not found") + + return VariableResponse.model_validate(variable, from_attributes=True) + + +@variables_router.patch("/{variable_key}", responses=create_openapi_http_exception_doc([400, 401, 403, 404])) +async def patch_variable( + variable_key: str, + patch_body: VariableBody, + session: Annotated[Session, Depends(get_session)], + update_mask: list[str] | None = Query(None), +) -> VariableResponse: + """Update a variable by key.""" + if patch_body.key != variable_key: + raise HTTPException(400, "Invalid body, key from request body doesn't match uri parameter") + non_update_fields = {"key"} + variable = session.scalar(select(Variable).filter_by(key=variable_key).limit(1)) + if not variable: + raise HTTPException(404, f"The Variable with key: `{variable_key}` was not found") + if update_mask: + data = patch_body.model_dump(include=set(update_mask) - non_update_fields) + else: + data = patch_body.model_dump(exclude=non_update_fields) + for key, val in data.items(): + setattr(variable, key, val) + session.add(variable) + return variable + + +@variables_router.post("/", status_code=201, responses=create_openapi_http_exception_doc([401, 403])) +async def post_variable( + post_body: VariableBody, + session: Annotated[Session, Depends(get_session)], +) -> VariableResponse: + """Create a variable.""" + Variable.set(**post_body.model_dump(), session=session) + + variable = session.scalar(select(Variable).where(Variable.key == post_body.key).limit(1)) + + return VariableResponse.model_validate(variable, from_attributes=True) diff --git a/airflow/api_fastapi/views/ui/__init__.py b/airflow/api_fastapi/views/ui/__init__.py index 8495ac5e5e6a4..f01686cc99ee2 100644 --- a/airflow/api_fastapi/views/ui/__init__.py +++ b/airflow/api_fastapi/views/ui/__init__.py @@ -18,7 +18,9 @@ from airflow.api_fastapi.views.router import AirflowRouter from airflow.api_fastapi.views.ui.assets import assets_router +from airflow.api_fastapi.views.ui.dashboard import dashboard_router ui_router = AirflowRouter(prefix="/ui") ui_router.include_router(assets_router) +ui_router.include_router(dashboard_router) diff --git a/airflow/api_fastapi/views/ui/assets.py b/airflow/api_fastapi/views/ui/assets.py index 4a4ad1d0df9b4..1a198745f11f3 100644 --- a/airflow/api_fastapi/views/ui/assets.py +++ b/airflow/api_fastapi/views/ui/assets.py @@ -77,7 +77,7 @@ async def next_run_assets( ), isouter=True, ) - .where(DagScheduleAssetReference.dag_id == dag_id, ~AssetModel.is_orphaned) + .where(DagScheduleAssetReference.dag_id == dag_id, AssetModel.active.has()) .group_by(AssetModel.id, AssetModel.uri) .order_by(AssetModel.uri) ) diff --git a/airflow/api_fastapi/views/ui/dashboard.py b/airflow/api_fastapi/views/ui/dashboard.py new file mode 100644 index 0000000000000..0d6b69a1ced5b --- /dev/null +++ b/airflow/api_fastapi/views/ui/dashboard.py @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + +from fastapi import Depends +from sqlalchemy import func, select +from sqlalchemy.orm import Session +from typing_extensions import Annotated + +from airflow.api_fastapi.openapi.exceptions import create_openapi_http_exception_doc +from airflow.api_fastapi.parameters import DateTimeQuery +from airflow.api_fastapi.serializers.dashboard import HistoricalMetricDataResponse +from airflow.models.dagrun import DagRun, DagRunType +from airflow.models.taskinstance import TaskInstance +from airflow.utils.state import DagRunState, TaskInstanceState + +if TYPE_CHECKING: + from sqlalchemy.orm import Session +from airflow.api_fastapi.db.common import get_session +from airflow.api_fastapi.views.router import AirflowRouter +from airflow.utils import timezone + +dashboard_router = AirflowRouter(tags=["Dashboard"]) + + +@dashboard_router.get( + "/dashboard/historical_metrics_data", + include_in_schema=False, + responses=create_openapi_http_exception_doc([400]), +) +async def historical_metrics( + start_date: DateTimeQuery, + end_date: DateTimeQuery, + session: Annotated[Session, Depends(get_session)], +) -> HistoricalMetricDataResponse: + """Return cluster activity historical metrics.""" + # DagRuns + dag_run_types = session.execute( + select(DagRun.run_type, func.count(DagRun.run_id)) + .where( + DagRun.start_date >= start_date, + func.coalesce(DagRun.end_date, timezone.utcnow()) <= end_date, + ) + .group_by(DagRun.run_type) + ).all() + + dag_run_states = session.execute( + select(DagRun.state, func.count(DagRun.run_id)) + .where( + DagRun.start_date >= start_date, + func.coalesce(DagRun.end_date, timezone.utcnow()) <= end_date, + ) + .group_by(DagRun.state) + ).all() + + # TaskInstances + task_instance_states = session.execute( + select(TaskInstance.state, func.count(TaskInstance.run_id)) + .join(TaskInstance.dag_run) + .where( + DagRun.start_date >= start_date, + func.coalesce(DagRun.end_date, timezone.utcnow()) <= end_date, + ) + .group_by(TaskInstance.state) + ).all() + + # Combining historical metrics response as dictionary + historical_metrics_response = { + "dag_run_types": { + **{dag_run_type.value: 0 for dag_run_type in DagRunType}, + **dict(dag_run_types), + }, + "dag_run_states": { + **{dag_run_state.value: 0 for dag_run_state in DagRunState}, + **dict(dag_run_states), + }, + "task_instance_states": { + "no_status": 0, + **{ti_state.value: 0 for ti_state in TaskInstanceState}, + **{ti_state or "no_status": sum_value for ti_state, sum_value in task_instance_states}, + }, + } + + return HistoricalMetricDataResponse.model_validate(historical_metrics_response, from_attributes=True) diff --git a/airflow/assets/__init__.py b/airflow/assets/__init__.py index deb9aa593ded5..e11b9c49df36d 100644 --- a/airflow/assets/__init__.py +++ b/airflow/assets/__init__.py @@ -17,6 +17,7 @@ from __future__ import annotations +import logging import os import urllib.parse import warnings @@ -41,6 +42,9 @@ __all__ = ["Asset", "AssetAll", "AssetAny"] +log = logging.getLogger(__name__) + + def normalize_noop(parts: SplitResult) -> SplitResult: """ Place-hold a :class:`~urllib.parse.SplitResult`` normalizer. @@ -109,14 +113,16 @@ def _sanitize_uri(uri: str) -> str: try: parsed = normalizer(parsed) except ValueError as exception: - if conf.getboolean("core", "strict_asset_uri_validation", fallback=False): + if conf.getboolean("core", "strict_asset_uri_validation", fallback=True): + log.error( + ( + "The Asset URI %s is not AIP-60 compliant: %s. " + "Please check https://airflow.apache.org/docs/apache-airflow/stable/authoring-and-scheduling/assets.html" + ), + uri, + exception, + ) raise - warnings.warn( - f"The Asset URI {uri} is not AIP-60 compliant: {exception}. " - f"In Airflow 3, this will raise an exception.", - UserWarning, - stacklevel=3, - ) return urllib.parse.urlunsplit(parsed) diff --git a/airflow/assets/manager.py b/airflow/assets/manager.py index d68a0efc87d12..cd4d72e633a8e 100644 --- a/airflow/assets/manager.py +++ b/airflow/assets/manager.py @@ -266,14 +266,15 @@ def _send_dag_priority_parsing_request_if_needed(fileloc: str) -> str | None: return None return req.fileloc - (_send_dag_priority_parsing_request_if_needed(fileloc) for fileloc in file_locs) + for fileloc in file_locs: + _send_dag_priority_parsing_request_if_needed(fileloc) @classmethod def _postgres_send_dag_priority_parsing_request(cls, file_locs: Iterable[str], session: Session) -> None: from sqlalchemy.dialects.postgresql import insert stmt = insert(DagPriorityParsingRequest).on_conflict_do_nothing() - session.execute(stmt, {"fileloc": fileloc for fileloc in file_locs}) + session.execute(stmt, [{"fileloc": fileloc} for fileloc in file_locs]) def resolve_asset_manager() -> AssetManager: diff --git a/airflow/auth/managers/simple/simple_auth_manager.py b/airflow/auth/managers/simple/simple_auth_manager.py index 4a9639a998c46..78dccf7c2a980 100644 --- a/airflow/auth/managers/simple/simple_auth_manager.py +++ b/airflow/auth/managers/simple/simple_auth_manager.py @@ -30,7 +30,7 @@ from airflow.auth.managers.base_auth_manager import BaseAuthManager, ResourceMethod from airflow.auth.managers.simple.user import SimpleAuthManagerUser from airflow.auth.managers.simple.views.auth import SimpleAuthManagerAuthenticationViews -from hatch_build import AIRFLOW_ROOT_PATH +from airflow.configuration import AIRFLOW_HOME if TYPE_CHECKING: from airflow.auth.managers.models.base_user import BaseUser @@ -78,20 +78,22 @@ class SimpleAuthManager(BaseAuthManager): :param appbuilder: the flask app builder """ - # File that contains the generated passwords - GENERATED_PASSWORDS_FILE = ( - AIRFLOW_ROOT_PATH / "generated" / "simple_auth_manager_passwords.json.generated" - ) - # Cache containing the password associated to a username passwords: dict[str, str] = {} + @staticmethod + def get_generated_password_file() -> str: + return os.path.join( + os.getenv("AIRFLOW_AUTH_MANAGER_CREDENTIAL_DIRECTORY", AIRFLOW_HOME), + "simple_auth_manager_passwords.json.generated", + ) + def init(self) -> None: user_passwords_from_file = {} # Read passwords from file - if os.path.isfile(self.GENERATED_PASSWORDS_FILE): - with open(self.GENERATED_PASSWORDS_FILE) as file: + if os.path.isfile(self.get_generated_password_file()): + with open(self.get_generated_password_file()) as file: passwords_str = file.read().strip() user_passwords_from_file = json.loads(passwords_str) @@ -109,7 +111,7 @@ def init(self) -> None: self._print_output(f"Password for user '{user['username']}': {self.passwords[user['username']]}") - with open(self.GENERATED_PASSWORDS_FILE, "w") as file: + with open(self.get_generated_password_file(), "w") as file: file.write(json.dumps(self.passwords)) def is_logged_in(self) -> bool: diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index 4b42f2b96fb2c..8fe239c27ba40 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -264,7 +264,7 @@ def string_lower_type(val): help="The number of next execution datetimes to show", ) -# backfill +# misc ARG_MARK_SUCCESS = Arg( ("-m", "--mark-success"), help="Mark jobs as succeeded without running them", action="store_true" ) @@ -300,78 +300,34 @@ def string_lower_type(val): ) ARG_VERBOSE = Arg(("-v", "--verbose"), help="Make logging output more verbose", action="store_true") ARG_LOCAL = Arg(("-l", "--local"), help="Run the task using the LocalExecutor", action="store_true") -ARG_DONOT_PICKLE = Arg( - ("-x", "--donot-pickle"), - help=( - "Do not attempt to pickle the DAG object to send over " - "to the workers, just tell the workers to run their version " - "of the code" - ), - action="store_true", -) -ARG_BF_IGNORE_DEPENDENCIES = Arg( - ("-i", "--ignore-dependencies"), - help=( - "Skip upstream tasks, run only the tasks " - "matching the regexp. Only works in conjunction " - "with task_regex" - ), - action="store_true", -) ARG_POOL = Arg(("--pool",), "Resource pool to use") -ARG_DELAY_ON_LIMIT = Arg( - ("--delay-on-limit",), - help=( - "Amount of time in seconds to wait when the limit " - "on maximum active dag runs (max_active_runs) has " - "been reached before trying to execute a dag run " - "again" - ), - type=float, - default=1.0, -) -ARG_RESET_DAG_RUN = Arg( - ("--reset-dagruns",), - help=( - "if set, the backfill will delete existing " - "backfill-related DAG runs and start " - "anew with fresh, running DAG runs" - ), - action="store_true", -) -ARG_RERUN_FAILED_TASKS = Arg( - ("--rerun-failed-tasks",), - help=( - "if set, the backfill will auto-rerun " - "all the failed tasks for the backfill date range " - "instead of throwing exceptions" - ), - action="store_true", -) -ARG_CONTINUE_ON_FAILURES = Arg( - ("--continue-on-failures",), - help=("if set, the backfill will keep going even if some of the tasks failed"), - action="store_true", + + +# backfill +ARG_BACKFILL_DAG = Arg(flags=("--dag",), help="The dag to backfill.", required=True) +ARG_BACKFILL_FROM_DATE = Arg( + ("--from-date",), help="Earliest logical date to backfill.", type=parsedate, required=True ) -ARG_DISABLE_RETRY = Arg( - ("--disable-retry",), - help=("if set, the backfill will set tasks as failed without retrying."), - action="store_true", +ARG_BACKFILL_TO_DATE = Arg( + ("--to-date",), help="Latest logical date to backfill", type=parsedate, required=True ) +ARG_DAG_RUN_CONF = Arg(flags=("--dag-run-conf",), help="JSON dag run configuration.") ARG_RUN_BACKWARDS = Arg( - ( - "-B", - "--run-backwards", - ), + flags=("--run-backwards",), help=( - "if set, the backfill will run tasks from the most " - "recent day first. if there are tasks that depend_on_past " - "this option will throw an exception" + "If set, the backfill will run tasks from the most recent logical date first. " + "Not supported if there are tasks that depend_on_past." ), action="store_true", ) +ARG_MAX_ACTIVE_RUNS = Arg( + ("--max-active-runs",), + type=positive_int(allow_zero=False), + help="Max active runs for this backfill.", +) +# misc ARG_TREAT_DAG_ID_AS_REGEX = Arg( ("--treat-dag-id-as-regex",), help=("if set, dag_id will be treated as regex instead of an exact string"), @@ -968,7 +924,7 @@ def string_lower_type(val): # jobs check ARG_JOB_TYPE_FILTER = Arg( ("--job-type",), - choices=("BackfillJob", "LocalTaskJob", "SchedulerJob", "TriggererJob", "DagProcessorJob"), + choices=("LocalTaskJob", "SchedulerJob", "TriggererJob", "DagProcessorJob"), action="store", help="The type of job(s) that will be checked.", ) @@ -1056,6 +1012,22 @@ class GroupCommand(NamedTuple): CLICommand = Union[ActionCommand, GroupCommand] +BACKFILL_COMMANDS = ( + ActionCommand( + name="create", + help="Create a backfill for a dag.", + description="Run subsections of a DAG for a specified date range.", + func=lazy_load_command("airflow.cli.commands.backfill_command.create_backfill"), + args=( + ARG_BACKFILL_DAG, + ARG_BACKFILL_FROM_DATE, + ARG_BACKFILL_TO_DATE, + ARG_DAG_RUN_CONF, + ARG_RUN_BACKWARDS, + ARG_MAX_ACTIVE_RUNS, + ), + ), +) DAGS_COMMANDS = ( ActionCommand( name="details", @@ -1227,40 +1199,6 @@ class GroupCommand(NamedTuple): ARG_VERBOSE, ), ), - ActionCommand( - name="backfill", - help="Run subsections of a DAG for a specified date range", - description=( - "Run subsections of a DAG for a specified date range. If reset_dag_run option is used, " - "backfill will first prompt users whether airflow should clear all the previous dag_run and " - "task_instances within the backfill date range. If rerun_failed_tasks is used, backfill " - "will auto re-run the previous failed task instances within the backfill date range" - ), - func=lazy_load_command("airflow.cli.commands.dag_command.dag_backfill"), - args=( - ARG_DAG_ID, - ARG_TASK_REGEX, - ARG_START_DATE, - ARG_END_DATE, - ARG_MARK_SUCCESS, - ARG_LOCAL, - ARG_DONOT_PICKLE, - ARG_YES, - ARG_CONTINUE_ON_FAILURES, - ARG_DISABLE_RETRY, - ARG_BF_IGNORE_DEPENDENCIES, - ARG_SUBDIR, - ARG_POOL, - ARG_DELAY_ON_LIMIT, - ARG_DRY_RUN, - ARG_VERBOSE, - ARG_CONF, - ARG_RESET_DAG_RUN, - ARG_RERUN_FAILED_TASKS, - ARG_RUN_BACKWARDS, - ARG_TREAT_DAG_ID_AS_REGEX, - ), - ), ActionCommand( name="test", help="Execute one single DagRun", @@ -1913,6 +1851,11 @@ class GroupCommand(NamedTuple): help="Manage DAGs", subcommands=DAGS_COMMANDS, ), + GroupCommand( + name="backfill", + help="Manage backfills", + subcommands=BACKFILL_COMMANDS, + ), GroupCommand( name="tasks", help="Manage tasks", diff --git a/airflow/cli/commands/backfill_command.py b/airflow/cli/commands/backfill_command.py new file mode 100644 index 0000000000000..8714ed5585004 --- /dev/null +++ b/airflow/cli/commands/backfill_command.py @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import logging +import signal + +from airflow import settings +from airflow.models.backfill import _create_backfill +from airflow.utils import cli as cli_utils +from airflow.utils.cli import sigint_handler +from airflow.utils.providers_configuration_loader import providers_configuration_loaded + + +@cli_utils.action_cli +@providers_configuration_loaded +def create_backfill(args) -> None: + """Create backfill job or dry run for a DAG or list of DAGs using regex.""" + logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) + signal.signal(signal.SIGTERM, sigint_handler) + + _create_backfill( + dag_id=args.dag, + from_date=args.from_date, + to_date=args.to_date, + max_active_runs=args.max_active_runs, + reverse=args.run_backwards, + dag_run_conf=args.dag_run_conf, + ) diff --git a/airflow/cli/commands/connection_command.py b/airflow/cli/commands/connection_command.py index f68830c490bc8..aace3f9c9aede 100644 --- a/airflow/cli/commands/connection_command.py +++ b/airflow/cli/commands/connection_command.py @@ -21,6 +21,7 @@ import json import os import warnings +from functools import cache from pathlib import Path from typing import Any from urllib.parse import urlsplit, urlunsplit @@ -30,7 +31,6 @@ from airflow.cli.simple_table import AirflowConsole from airflow.cli.utils import is_stdout, print_export_output -from airflow.compat.functools import cache from airflow.configuration import conf from airflow.exceptions import AirflowNotFoundException from airflow.hooks.base import BaseHook diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index 0697709754765..83d0430a717bd 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -23,7 +23,6 @@ import json import logging import operator -import signal import subprocess import sys from typing import TYPE_CHECKING @@ -31,128 +30,31 @@ import re2 from sqlalchemy import delete, select -from airflow import settings from airflow.api.client import get_current_api_client from airflow.api_connexion.schemas.dag_schema import dag_schema from airflow.cli.simple_table import AirflowConsole -from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.jobs.job import Job from airflow.models import DagBag, DagModel, DagRun, TaskInstance -from airflow.models.dag import DAG from airflow.models.serialized_dag import SerializedDagModel from airflow.utils import cli as cli_utils, timezone -from airflow.utils.cli import get_dag, get_dags, process_subdir, sigint_handler, suppress_logs_and_warning +from airflow.utils.cli import get_dag, process_subdir, suppress_logs_and_warning from airflow.utils.dag_parsing_context import _airflow_parsing_context_manager from airflow.utils.dot_renderer import render_dag, render_dag_dependencies from airflow.utils.helpers import ask_yesno from airflow.utils.providers_configuration_loader import providers_configuration_loaded from airflow.utils.session import NEW_SESSION, create_session, provide_session from airflow.utils.state import DagRunState -from airflow.utils.types import DagRunTriggeredByType if TYPE_CHECKING: from graphviz.dot import Dot from sqlalchemy.orm import Session + from airflow.models.dag import DAG from airflow.timetables.base import DataInterval - log = logging.getLogger(__name__) -def _run_dag_backfill(dags: list[DAG], args) -> None: - # If only one date is passed, using same as start and end - args.end_date = args.end_date or args.start_date - args.start_date = args.start_date or args.end_date - - run_conf = None - if args.conf: - run_conf = json.loads(args.conf) - - for dag in dags: - if args.task_regex: - dag = dag.partial_subset( - task_ids_or_regex=args.task_regex, include_upstream=not args.ignore_dependencies - ) - if not dag.task_dict: - raise AirflowException( - f"There are no tasks that match '{args.task_regex}' regex. Nothing to run, exiting..." - ) - - if args.dry_run: - print(f"Dry run of DAG {dag.dag_id} on {args.start_date}") - dagrun_infos = dag.iter_dagrun_infos_between(earliest=args.start_date, latest=args.end_date) - for dagrun_info in dagrun_infos: - dr = DagRun( - dag.dag_id, - execution_date=dagrun_info.logical_date, - data_interval=dagrun_info.data_interval, - triggered_by=DagRunTriggeredByType.CLI, - ) - - for task in dag.tasks: - print(f"Task {task.task_id} located in DAG {dag.dag_id}") - ti = TaskInstance(task, run_id=None) - ti.dag_run = dr - ti.dry_run() - else: - if args.reset_dagruns: - DAG.clear_dags( - [dag], - start_date=args.start_date, - end_date=args.end_date, - confirm_prompt=not args.yes, - dag_run_state=DagRunState.QUEUED, - ) - - try: - dag.run( - start_date=args.start_date, - end_date=args.end_date, - mark_success=args.mark_success, - local=args.local, - donot_pickle=(args.donot_pickle or conf.getboolean("core", "donot_pickle")), - ignore_first_depends_on_past=args.ignore_first_depends_on_past, - ignore_task_deps=args.ignore_dependencies, - pool=args.pool, - delay_on_limit_secs=args.delay_on_limit, - verbose=args.verbose, - conf=run_conf, - rerun_failed_tasks=args.rerun_failed_tasks, - run_backwards=args.run_backwards, - continue_on_failures=args.continue_on_failures, - disable_retry=args.disable_retry, - ) - except ValueError as vr: - print(str(vr)) - sys.exit(1) - - -@cli_utils.action_cli -@providers_configuration_loaded -def dag_backfill(args, dag: list[DAG] | DAG | None = None) -> None: - """Create backfill job or dry run for a DAG or list of DAGs using regex.""" - logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) - signal.signal(signal.SIGTERM, sigint_handler) - args.ignore_first_depends_on_past = True - - if not args.start_date and not args.end_date: - raise AirflowException("Provide a start_date and/or end_date") - - if not dag: - dags = get_dags(args.subdir, dag_id=args.dag_id, use_regex=args.treat_dag_id_as_regex) - elif isinstance(dag, list): - dags = dag - else: - dags = [dag] - del dag - - dags.sort(key=lambda d: d.dag_id) - _run_dag_backfill(dags, args) - if len(dags) > 1: - log.info("All of the backfills are done.") - - @cli_utils.action_cli @providers_configuration_loaded def dag_trigger(args) -> None: diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml index b96c07f237376..0be77a3b6829a 100644 --- a/airflow/config_templates/config.yml +++ b/airflow/config_templates/config.yml @@ -486,9 +486,7 @@ core: strict_asset_uri_validation: description: | Asset URI validation should raise an exception if it is not compliant with AIP-60. - By default this configuration is false, meaning that Airflow 2.x only warns the user. - In Airflow 3, this configuration will be enabled by default. - default: "False" + default: "True" example: ~ version_added: 2.9.2 type: boolean diff --git a/airflow/configuration.py b/airflow/configuration.py index f50e19268380b..81dc18365392e 100644 --- a/airflow/configuration.py +++ b/airflow/configuration.py @@ -1790,9 +1790,7 @@ def load_providers_configuration(self): ) self._default_values = create_default_config_parser(self.configuration_description) # sensitive_config_values needs to be refreshed here. This is a cached_property, so we can delete - # the cached values, and it will be refreshed on next access. This has been an implementation - # detail in Python 3.8 but as of Python 3.9 it is documented behaviour. - # See https://docs.python.org/3/library/functools.html#functools.cached_property + # the cached values, and it will be refreshed on next access. try: del self.sensitive_config_values except AttributeError: diff --git a/airflow/dag_processing/collection.py b/airflow/dag_processing/collection.py index c8ce5dc873afa..f68ffbf331dd2 100644 --- a/airflow/dag_processing/collection.py +++ b/airflow/dag_processing/collection.py @@ -31,13 +31,13 @@ import logging from typing import TYPE_CHECKING, NamedTuple -from sqlalchemy import func, select +from sqlalchemy import func, select, tuple_ from sqlalchemy.orm import joinedload, load_only -from sqlalchemy.sql import expression from airflow.assets import Asset, AssetAlias from airflow.assets.manager import asset_manager from airflow.models.asset import ( + AssetActive, AssetAliasModel, AssetModel, DagScheduleAssetAliasReference, @@ -298,8 +298,6 @@ def add_assets(self, *, session: Session) -> dict[str, AssetModel]: orm_assets: dict[str, AssetModel] = { am.uri: am for am in session.scalars(select(AssetModel).where(AssetModel.uri.in_(self.assets))) } - for model in orm_assets.values(): - model.is_orphaned = expression.false() orm_assets.update( (model.uri, model) for model in asset_manager.create_assets( @@ -328,6 +326,20 @@ def add_asset_aliases(self, *, session: Session) -> dict[str, AssetAliasModel]: ) return orm_aliases + def add_asset_active_references(self, assets: Collection[AssetModel], *, session: Session) -> None: + existing_entries = set( + session.execute( + select(AssetActive.name, AssetActive.uri).where( + tuple_(AssetActive.name, AssetActive.uri).in_((asset.name, asset.uri) for asset in assets) + ) + ) + ) + session.add_all( + AssetActive.for_asset(asset) + for asset in assets + if (asset.name, asset.uri) not in existing_entries + ) + def add_dag_asset_references( self, dags: dict[str, DagModel], diff --git a/airflow/decorators/bash.py b/airflow/decorators/bash.py index 39d3131d28c7b..44738492da098 100644 --- a/airflow/decorators/bash.py +++ b/airflow/decorators/bash.py @@ -21,7 +21,7 @@ from typing import Any, Callable, Collection, Mapping, Sequence from airflow.decorators.base import DecoratedOperator, TaskDecorator, task_decorator_factory -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator from airflow.utils.context import Context, context_merge from airflow.utils.operator_helpers import determine_kwargs from airflow.utils.types import NOTSET diff --git a/airflow/example_dags/example_assets.py b/airflow/example_dags/example_assets.py index 66369794ed999..451f17a3a3abd 100644 --- a/airflow/example_dags/example_assets.py +++ b/airflow/example_dags/example_assets.py @@ -56,7 +56,7 @@ from airflow.assets import Asset from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator from airflow.timetables.assets import AssetOrTimeSchedule from airflow.timetables.trigger import CronTriggerTimetable diff --git a/airflow/example_dags/example_bash_operator.py b/airflow/example_dags/example_bash_operator.py index b08d31c9930c7..27702d4cb5f10 100644 --- a/airflow/example_dags/example_bash_operator.py +++ b/airflow/example_dags/example_bash_operator.py @@ -24,8 +24,8 @@ import pendulum from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator from airflow.operators.empty import EmptyOperator +from airflow.providers.standard.operators.bash import BashOperator with DAG( dag_id="example_bash_operator", diff --git a/airflow/example_dags/example_complex.py b/airflow/example_dags/example_complex.py index e7eba78eae815..6d7d504f13d51 100644 --- a/airflow/example_dags/example_complex.py +++ b/airflow/example_dags/example_complex.py @@ -25,7 +25,7 @@ from airflow.models.baseoperator import chain from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator with DAG( dag_id="example_complex", diff --git a/airflow/example_dags/example_inlet_event_extra.py b/airflow/example_dags/example_inlet_event_extra.py index 974534c295b79..9773df7a3f913 100644 --- a/airflow/example_dags/example_inlet_event_extra.py +++ b/airflow/example_dags/example_inlet_event_extra.py @@ -28,7 +28,7 @@ from airflow.assets import Asset from airflow.decorators import task from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator asset = Asset("s3://output/1.txt") diff --git a/airflow/example_dags/example_outlet_event_extra.py b/airflow/example_dags/example_outlet_event_extra.py index 893090460b538..0d097eab0ac27 100644 --- a/airflow/example_dags/example_outlet_event_extra.py +++ b/airflow/example_dags/example_outlet_event_extra.py @@ -29,7 +29,7 @@ from airflow.assets.metadata import Metadata from airflow.decorators import task from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator ds = Asset("s3://output/1.txt") diff --git a/airflow/example_dags/example_passing_params_via_test_command.py b/airflow/example_dags/example_passing_params_via_test_command.py index 2fcb8e4edab7b..7dcd963c09681 100644 --- a/airflow/example_dags/example_passing_params_via_test_command.py +++ b/airflow/example_dags/example_passing_params_via_test_command.py @@ -27,7 +27,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator @task(task_id="run_this") diff --git a/airflow/example_dags/example_sensors.py b/airflow/example_dags/example_sensors.py index 6fb564e63ae43..f639083858101 100644 --- a/airflow/example_dags/example_sensors.py +++ b/airflow/example_dags/example_sensors.py @@ -22,11 +22,11 @@ import pendulum from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator +from airflow.providers.standard.sensors.bash import BashSensor from airflow.providers.standard.sensors.time import TimeSensor, TimeSensorAsync from airflow.providers.standard.sensors.time_delta import TimeDeltaSensor, TimeDeltaSensorAsync from airflow.providers.standard.sensors.weekday import DayOfWeekSensor -from airflow.sensors.bash import BashSensor from airflow.sensors.filesystem import FileSensor from airflow.sensors.python import PythonSensor from airflow.utils.trigger_rule import TriggerRule diff --git a/airflow/example_dags/example_setup_teardown.py b/airflow/example_dags/example_setup_teardown.py index 9fab87df7568b..81994fabc202d 100644 --- a/airflow/example_dags/example_setup_teardown.py +++ b/airflow/example_dags/example_setup_teardown.py @@ -22,7 +22,7 @@ import pendulum from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator from airflow.utils.task_group import TaskGroup with DAG( diff --git a/airflow/example_dags/example_task_group.py b/airflow/example_dags/example_task_group.py index 6435a912cc419..5129ad3cc61e1 100644 --- a/airflow/example_dags/example_task_group.py +++ b/airflow/example_dags/example_task_group.py @@ -22,8 +22,8 @@ import pendulum from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator from airflow.operators.empty import EmptyOperator +from airflow.providers.standard.operators.bash import BashOperator from airflow.utils.task_group import TaskGroup # [START howto_task_group] diff --git a/airflow/example_dags/example_trigger_target_dag.py b/airflow/example_dags/example_trigger_target_dag.py index 7a009b8dcc6d1..3af68a25607a4 100644 --- a/airflow/example_dags/example_trigger_target_dag.py +++ b/airflow/example_dags/example_trigger_target_dag.py @@ -27,7 +27,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator @task(task_id="run_this") diff --git a/airflow/example_dags/example_xcom.py b/airflow/example_dags/example_xcom.py index fa99b91834658..2563eda77ee19 100644 --- a/airflow/example_dags/example_xcom.py +++ b/airflow/example_dags/example_xcom.py @@ -24,7 +24,7 @@ from airflow.decorators import task from airflow.models.dag import DAG from airflow.models.xcom_arg import XComArg -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator value_1 = [1, 2, 3] value_2 = {"a": "b"} diff --git a/airflow/example_dags/example_xcomargs.py b/airflow/example_dags/example_xcomargs.py index d9d0c94f4ea01..a7103dc191135 100644 --- a/airflow/example_dags/example_xcomargs.py +++ b/airflow/example_dags/example_xcomargs.py @@ -25,7 +25,7 @@ from airflow.decorators import task from airflow.models.dag import DAG -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator log = logging.getLogger(__name__) diff --git a/airflow/example_dags/sql/tutorial_taskflow_template.sql b/airflow/example_dags/sql/tutorial_taskflow_template.sql new file mode 100644 index 0000000000000..375c39eac610b --- /dev/null +++ b/airflow/example_dags/sql/tutorial_taskflow_template.sql @@ -0,0 +1,23 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +select * from test_data +where 1=1 + and run_id = '{{ run_id }}' + and something_else = '{{ params.foobar }}' diff --git a/airflow/example_dags/tutorial.py b/airflow/example_dags/tutorial.py index 0e31775c7a9a7..6e27bbcd2e5fe 100644 --- a/airflow/example_dags/tutorial.py +++ b/airflow/example_dags/tutorial.py @@ -32,7 +32,7 @@ from airflow.models.dag import DAG # Operators; we need this to operate! -from airflow.operators.bash import BashOperator +from airflow.providers.standard.operators.bash import BashOperator # [END import_module] diff --git a/airflow/example_dags/tutorial_taskflow_templates.py b/airflow/example_dags/tutorial_taskflow_templates.py new file mode 100644 index 0000000000000..925f60524b5ea --- /dev/null +++ b/airflow/example_dags/tutorial_taskflow_templates.py @@ -0,0 +1,107 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +# [START tutorial] +# [START import_module] +import pendulum + +from airflow.decorators import dag, task +from airflow.operators.python import get_current_context + +# [END import_module] + + +# [START instantiate_dag] +@dag( + schedule="@daily", + start_date=pendulum.datetime(2021, 1, 1, tz="UTC"), + catchup=False, + tags=["example"], + params={"foobar": "param_from_dag", "other_param": "from_dag"}, +) +def tutorial_taskflow_templates(): + """ + ### TaskFlow API Tutorial Documentation + This is a simple data pipeline example which demonstrates the use of + the templates in the TaskFlow API. + Documentation that goes along with the Airflow TaskFlow API tutorial is + located + [here](https://airflow.apache.org/docs/apache-airflow/stable/tutorial_taskflow_api.html) + """ + # [END instantiate_dag] + + # [START template_test] + @task( + # Causes variables that end with `.sql` to be read and templates + # within to be rendered. + templates_exts=[".sql"], + ) + def template_test(sql, test_var, data_interval_end): + context = get_current_context() + + # Will print... + # select * from test_data + # where 1=1 + # and run_id = 'scheduled__2024-10-09T00:00:00+00:00' + # and something_else = 'param_from_task' + print(f"sql: {sql}") + + # Will print `scheduled__2024-10-09T00:00:00+00:00` + print(f"test_var: {test_var}") + + # Will print `2024-10-10 00:00:00+00:00`. + # Note how we didn't pass this value when calling the task. Instead + # it was passed by the decorator from the context + print(f"data_interval_end: {data_interval_end}") + + # Will print... + # run_id: scheduled__2024-10-09T00:00:00+00:00; params.other_param: from_dag + template_str = "run_id: {{ run_id }}; params.other_param: {{ params.other_param }}" + rendered_template = context["task"].render_template( + template_str, + context, + ) + print(f"rendered template: {rendered_template}") + + # Will print the full context dict + print(f"context: {context}") + + # [END template_test] + + # [START main_flow] + template_test.override( + # Will be merged with the dict defined in the dag + # and override existing parameters. + # + # Must be passed into the decorator's parameters + # through `.override()` not into the actual task + # function + params={"foobar": "param_from_task"}, + )( + sql="sql/test.sql", + test_var="{{ run_id }}", + ) + # [END main_flow] + + +# [START dag_invocation] +tutorial_taskflow_templates() +# [END dag_invocation] + +# [END tutorial] diff --git a/airflow/exceptions.py b/airflow/exceptions.py index 55dd02fdae313..ccf62ca5e8178 100644 --- a/airflow/exceptions.py +++ b/airflow/exceptions.py @@ -327,31 +327,6 @@ class PoolNotFound(AirflowNotFoundException): """Raise when a Pool is not available in the system.""" -class NoAvailablePoolSlot(AirflowException): - """Raise when there is not enough slots in pool.""" - - -class DagConcurrencyLimitReached(AirflowException): - """Raise when DAG max_active_tasks limit is reached.""" - - -class TaskConcurrencyLimitReached(AirflowException): - """Raise when task max_active_tasks limit is reached.""" - - -class BackfillUnfinished(AirflowException): - """ - Raises when not all tasks succeed in backfill. - - :param message: The human-readable description of the exception - :param ti_status: The information about all task statuses - """ - - def __init__(self, message, ti_status): - super().__init__(message) - self.ti_status = ti_status - - class FileSyntaxError(NamedTuple): """Information about a single error in a file.""" diff --git a/airflow/executors/executor_loader.py b/airflow/executors/executor_loader.py index 1eeee1ff68a9f..4a940793df27f 100644 --- a/airflow/executors/executor_loader.py +++ b/airflow/executors/executor_loader.py @@ -171,7 +171,7 @@ def set_default_executor(cls, executor: BaseExecutor) -> None: """ Externally set an executor to be the default. - This is used in rare cases such as dag.run which allows, as a user convenience, to provide + This is used in rare cases such as dag.test which allows, as a user convenience, to provide the executor by cli/argument instead of Airflow configuration """ exec_class_name = executor.__class__.__qualname__ diff --git a/airflow/io/__init__.py b/airflow/io/__init__.py index 9996a77717ae0..49f2711c3c6cd 100644 --- a/airflow/io/__init__.py +++ b/airflow/io/__init__.py @@ -18,6 +18,7 @@ import inspect import logging +from functools import cache from typing import ( TYPE_CHECKING, Callable, @@ -26,7 +27,6 @@ from fsspec.implementations.local import LocalFileSystem -from airflow.compat.functools import cache from airflow.providers_manager import ProvidersManager from airflow.stats import Stats from airflow.utils.module_loading import import_string diff --git a/airflow/jobs/backfill_job_runner.py b/airflow/jobs/backfill_job_runner.py deleted file mode 100644 index 19dda4d698221..0000000000000 --- a/airflow/jobs/backfill_job_runner.py +++ /dev/null @@ -1,1106 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -from __future__ import annotations - -import time -from typing import TYPE_CHECKING, Any, Iterable, Iterator, Mapping, Sequence - -import attr -import pendulum -from sqlalchemy import case, or_, select, tuple_, update -from sqlalchemy.exc import OperationalError -from sqlalchemy.orm.session import make_transient -from tabulate import tabulate - -from airflow import models -from airflow.exceptions import ( - AirflowException, - BackfillUnfinished, - DagConcurrencyLimitReached, - NoAvailablePoolSlot, - PoolNotFound, - TaskConcurrencyLimitReached, - UnknownExecutorException, -) -from airflow.executors.executor_loader import ExecutorLoader -from airflow.jobs.base_job_runner import BaseJobRunner -from airflow.jobs.job import Job, perform_heartbeat -from airflow.models import DAG, DagPickle -from airflow.models.dagrun import DagRun -from airflow.models.taskinstance import TaskInstance -from airflow.ti_deps.dep_context import DepContext -from airflow.ti_deps.dependencies_deps import BACKFILL_QUEUED_DEPS -from airflow.timetables.base import DagRunInfo -from airflow.utils import helpers, timezone -from airflow.utils.configuration import tmp_configuration_copy -from airflow.utils.log.logging_mixin import LoggingMixin -from airflow.utils.session import NEW_SESSION, provide_session -from airflow.utils.state import DagRunState, State, TaskInstanceState -from airflow.utils.types import DagRunTriggeredByType, DagRunType - -if TYPE_CHECKING: - import datetime - - from sqlalchemy.orm.session import Session - - from airflow.executors.base_executor import BaseExecutor - from airflow.models.abstractoperator import AbstractOperator - from airflow.models.taskinstance import TaskInstanceKey - - -class BackfillJobRunner(BaseJobRunner, LoggingMixin): - """ - A backfill job runner consists of a dag for a specific time range. - - It triggers a set of task instance runs, in the right order and lasts for - as long as it takes for the set of task instance to be completed. - """ - - job_type = "BackfillJob" - - STATES_COUNT_AS_RUNNING = (TaskInstanceState.RUNNING, TaskInstanceState.QUEUED) - - @attr.define - class _DagRunTaskStatus: - """ - Internal status of the backfill job. - - This class is intended to be instantiated only within a BackfillJobRunner - instance and will track the execution of tasks, e.g. running, skipped, - succeeded, failed, etc. Information about the dag runs related to the - backfill job are also being tracked in this structure, e.g. finished runs, etc. - Any other status related information related to the execution of dag runs / tasks - can be included in this structure since it makes it easier to pass it around. - - :param to_run: Tasks to run in the backfill - :param running: Maps running task instance key to task instance object - :param skipped: Tasks that have been skipped - :param succeeded: Tasks that have succeeded so far - :param failed: Tasks that have failed - :param not_ready: Tasks not ready for execution - :param deadlocked: Deadlocked tasks - :param active_runs: Active dag runs at a certain point in time - :param executed_dag_run_dates: Datetime objects for the executed dag runs - :param finished_runs: Number of finished runs so far - :param total_runs: Number of total dag runs able to run - """ - - to_run: dict[TaskInstanceKey, TaskInstance] = attr.ib(factory=dict) - running: dict[TaskInstanceKey, TaskInstance] = attr.ib(factory=dict) - skipped: set[TaskInstanceKey] = attr.ib(factory=set) - succeeded: set[TaskInstanceKey] = attr.ib(factory=set) - failed: set[TaskInstanceKey] = attr.ib(factory=set) - not_ready: set[TaskInstanceKey] = attr.ib(factory=set) - deadlocked: set[TaskInstance] = attr.ib(factory=set) - active_runs: set[DagRun] = attr.ib(factory=set) - executed_dag_run_dates: set[pendulum.DateTime] = attr.ib(factory=set) - finished_runs: int = 0 - total_runs: int = 0 - - def __init__( - self, - job: Job, - dag: DAG, - start_date=None, - end_date=None, - mark_success=False, - donot_pickle=False, - ignore_first_depends_on_past=False, - ignore_task_deps=False, - pool=None, - delay_on_limit_secs=1.0, - verbose=False, - conf=None, - rerun_failed_tasks=False, - run_backwards=False, - run_at_least_once=False, - continue_on_failures=False, - disable_retry=False, - ) -> None: - """ - Create a BackfillJobRunner. - - :param dag: DAG object. - :param start_date: start date for the backfill date range. - :param end_date: end date for the backfill date range. - :param mark_success: flag whether to mark the task auto success. - :param donot_pickle: whether pickle - :param ignore_first_depends_on_past: whether to ignore depend on past - :param ignore_task_deps: whether to ignore the task dependency - :param pool: pool to backfill - :param delay_on_limit_secs: - :param verbose: - :param conf: a dictionary which user could pass k-v pairs for backfill - :param rerun_failed_tasks: flag to whether to - auto rerun the failed task in backfill - :param run_backwards: Whether to process the dates from most to least recent - :param run_at_least_once: If true, always run the DAG at least once even - if no logical run exists within the time range. - :param args: - :param kwargs: - """ - super().__init__(job) - self.dag = dag - self.dag_id = dag.dag_id - self.bf_start_date = start_date - self.bf_end_date = end_date - self.mark_success = mark_success - self.donot_pickle = donot_pickle - self.ignore_first_depends_on_past = ignore_first_depends_on_past - self.ignore_task_deps = ignore_task_deps - self.pool = pool - self.delay_on_limit_secs = delay_on_limit_secs - self.verbose = verbose - self.conf = conf - self.rerun_failed_tasks = rerun_failed_tasks - self.run_backwards = run_backwards - self.run_at_least_once = run_at_least_once - self.continue_on_failures = continue_on_failures - self.disable_retry = disable_retry - - def _update_counters(self, ti_status: _DagRunTaskStatus, session: Session) -> None: - """ - Update the counters per state of the tasks that were running. - - Can re-add to tasks to run when required. - - :param ti_status: the internal status of the backfill job tasks - """ - tis_to_be_scheduled = [] - refreshed_tis = [] - TI = TaskInstance - - ti_primary_key_to_ti_key = {ti_key.primary: ti_key for ti_key in ti_status.running.keys()} - - filter_for_tis = TI.filter_for_tis(list(ti_status.running.values())) - if filter_for_tis is not None: - refreshed_tis = session.scalars(select(TI).where(filter_for_tis)).all() - - for ti in refreshed_tis: - # Use primary key to match in memory information - ti_key = ti_primary_key_to_ti_key[ti.key.primary] - if ti.state == TaskInstanceState.SUCCESS: - ti_status.succeeded.add(ti_key) - self.log.debug("Task instance %s succeeded. Don't rerun.", ti) - ti_status.running.pop(ti_key) - continue - if ti.state == TaskInstanceState.SKIPPED: - ti_status.skipped.add(ti_key) - self.log.debug("Task instance %s skipped. Don't rerun.", ti) - ti_status.running.pop(ti_key) - continue - if ti.state == TaskInstanceState.FAILED: - self.log.error("Task instance %s failed", ti) - ti_status.failed.add(ti_key) - ti_status.running.pop(ti_key) - continue - # special case: if the task needs to run again put it back - if ti.state == TaskInstanceState.UP_FOR_RETRY: - self.log.warning("Task instance %s is up for retry", ti) - ti_status.running.pop(ti_key) - ti_status.to_run[ti.key] = ti - # special case: if the task needs to be rescheduled put it back - elif ti.state == TaskInstanceState.UP_FOR_RESCHEDULE: - self.log.warning("Task instance %s is up for reschedule", ti) - ti_status.running.pop(ti_key) - ti_status.to_run[ti.key] = ti - # special case: The state of the task can be set to NONE by the task itself - # when it reaches concurrency limits. It could also happen when the state - # is changed externally, e.g. by clearing tasks from the ui. We need to cover - # for that as otherwise those tasks would fall outside the scope of - # the backfill suddenly. - elif ti.state is None: - self.log.warning( - "FIXME: task instance %s state was set to none externally or " - "reaching concurrency limits. Re-adding task to queue.", - ti, - ) - tis_to_be_scheduled.append(ti) - ti_status.running.pop(ti_key) - ti_status.to_run[ti.key] = ti - # special case: Deferrable task can go from DEFERRED to SCHEDULED; - # when that happens, we need to put it back as in UP_FOR_RESCHEDULE - elif ti.state == TaskInstanceState.SCHEDULED: - self.log.debug("Task instance %s is resumed from deferred state", ti) - ti_status.running.pop(ti_key) - ti_status.to_run[ti.key] = ti - - # Batch schedule of task instances - if tis_to_be_scheduled: - filter_for_tis = TI.filter_for_tis(tis_to_be_scheduled) - session.execute( - update(TI) - .where(filter_for_tis) - .values( - state=TaskInstanceState.SCHEDULED, - try_number=case( - ( - or_(TI.state.is_(None), TI.state != TaskInstanceState.UP_FOR_RESCHEDULE), - TI.try_number + 1, - ), - else_=TI.try_number, - ), - ) - .execution_options(synchronize_session=False) - ) - session.flush() - - def _manage_executor_state( - self, - running: Mapping[TaskInstanceKey, TaskInstance], - executor: BaseExecutor, - session: Session, - ) -> Iterator[tuple[AbstractOperator, str, Sequence[TaskInstance], int]]: - """ - Compare task instances' states with that of the executor. - - Expands downstream mapped tasks when necessary. - - :param running: dict of key, task to verify - :return: An iterable of expanded TaskInstance per MappedTask - """ - # list of tuples (dag_id, task_id, execution_date, map_index) of running tasks in executor - buffered_events = list(executor.get_event_buffer().items()) - running_tis_ids = [ - (key.dag_id, key.task_id, key.run_id, key.map_index) - for key, _ in buffered_events - if key in running - ] - # list of TaskInstance of running tasks in executor (refreshed from db in batch) - refreshed_running_tis = session.scalars( - select(TaskInstance).where( - tuple_( - TaskInstance.dag_id, - TaskInstance.task_id, - TaskInstance.run_id, - TaskInstance.map_index, - ).in_(running_tis_ids) - ) - ).all() - # dict of refreshed TaskInstance by key to easily find them - running_dict = {(ti.dag_id, ti.task_id, ti.run_id, ti.map_index): ti for ti in refreshed_running_tis} - need_refresh = False - - for key, value in buffered_events: - state, info = value - ti_key = (key.dag_id, key.task_id, key.run_id, key.map_index) - if ti_key not in running_dict: - self.log.warning("%s state %s not in running=%s", key, state, running.values()) - continue - - ti = running_dict[ti_key] - if need_refresh: - ti.refresh_from_db(session=session) - - self.log.debug("Executor state: %s task %s", state, ti) - - if ( - state in (TaskInstanceState.FAILED, TaskInstanceState.SUCCESS) - and ti.state in self.STATES_COUNT_AS_RUNNING - ): - msg = ( - f"The executor reported that the task instance {ti} finished with state {state}, " - f"but the task instance's state attribute is {ti.state}. " - "Learn more: https://airflow.apache.org/docs/apache-airflow/stable/troubleshooting.html#task-state-changed-externally" - ) - if info is not None: - msg += f" Extra info: {info}" - self.log.error(msg) - ti.handle_failure(error=msg) - continue - - def _iter_task_needing_expansion() -> Iterator[AbstractOperator]: - from airflow.models.mappedoperator import AbstractOperator - - for node in self.dag.get_task(ti.task_id).iter_mapped_dependants(): - if isinstance(node, AbstractOperator): - yield node - else: # A (mapped) task group. All its children need expansion. - yield from node.iter_tasks() - - if ti.state not in self.STATES_COUNT_AS_RUNNING: - # Don't use ti.task; if this task is mapped, that attribute - # would hold the unmapped task. We need to original task here. - for node in _iter_task_needing_expansion(): - new_tis, num_mapped_tis = node.expand_mapped_task(ti.run_id, session=session) - yield node, ti.run_id, new_tis, num_mapped_tis - - @provide_session - def _get_dag_run( - self, - dagrun_info: DagRunInfo, - dag: DAG, - session: Session = NEW_SESSION, - ) -> DagRun | None: - """ - Return an existing dag run for the given run date or create one. - - If the max_active_runs limit is reached, this function will return None. - - :param dagrun_info: Schedule information for the dag run - :param dag: DAG - :param session: the database session object - :return: a DagRun in state RUNNING or None - """ - run_date = dagrun_info.logical_date - - respect_dag_max_active_limit = bool(dag.timetable.can_be_scheduled) - - current_active_dag_count = dag.get_num_active_runs(external_trigger=False) - - # check if we are scheduling on top of an already existing DAG run - # we could find a "scheduled" run instead of a "backfill" - runs = DagRun.find(dag_id=dag.dag_id, execution_date=run_date, session=session) - run: DagRun | None - if runs: - run = runs[0] - if run.state == DagRunState.RUNNING: - respect_dag_max_active_limit = False - # Fixes --conf overwrite for backfills with already existing DagRuns - run.conf = self.conf or {} - # start_date is cleared for existing DagRuns - run.start_date = timezone.utcnow() - else: - run = None - - # enforce max_active_runs limit for dag, special cases already - # handled by respect_dag_max_active_limit - if respect_dag_max_active_limit and current_active_dag_count >= dag.max_active_runs: - return None - - run = run or dag.create_dagrun( - execution_date=run_date, - data_interval=dagrun_info.data_interval, - start_date=timezone.utcnow(), - state=DagRunState.RUNNING, - external_trigger=False, - session=session, - conf=self.conf, - run_type=DagRunType.BACKFILL_JOB, - creating_job_id=self.job.id, - triggered_by=DagRunTriggeredByType.TIMETABLE, - ) - - # set required transient field - run.dag = dag - - # explicitly mark as backfill and running - run.state = DagRunState.RUNNING - run.run_type = DagRunType.BACKFILL_JOB - run.verify_integrity(session=session) - - run.notify_dagrun_state_changed(msg="started") - return run - - @provide_session - def _task_instances_for_dag_run( - self, - dag: DAG, - dag_run: DagRun, - session: Session = NEW_SESSION, - ) -> dict[TaskInstanceKey, TaskInstance]: - """ - Return a map of task instance keys to task instance objects for the given dag run. - - :param dag_run: the dag run to get the tasks from - :param session: the database session object - """ - tasks_to_run = {} - - if dag_run is None: - return tasks_to_run - - # check if we have orphaned tasks - self.reset_state_for_orphaned_tasks(filter_by_dag_run=dag_run, session=session) - - # for some reason if we don't refresh the reference to run is lost - dag_run.refresh_from_db(session=session) - make_transient(dag_run) - - dag_run.dag = dag - info = dag_run.task_instance_scheduling_decisions(session=session) - schedulable_tis = info.schedulable_tis - try: - for ti in dag_run.get_task_instances(session=session): - if ti in schedulable_tis: - if ti.state != TaskInstanceState.UP_FOR_RESCHEDULE: - ti.try_number += 1 - ti.set_state(TaskInstanceState.SCHEDULED) - if ti.state != TaskInstanceState.REMOVED: - tasks_to_run[ti.key] = ti - session.commit() - except Exception: - session.rollback() - raise - return tasks_to_run - - def _log_progress(self, ti_status: _DagRunTaskStatus) -> None: - self.log.info( - "[backfill progress] | finished run %s of %s | tasks waiting: %s | succeeded: %s | " - "running: %s | failed: %s | skipped: %s | deadlocked: %s | not ready: %s", - ti_status.finished_runs, - ti_status.total_runs, - len(ti_status.to_run), - len(ti_status.succeeded), - len(ti_status.running), - len(ti_status.failed), - len(ti_status.skipped), - len(ti_status.deadlocked), - len(ti_status.not_ready), - ) - - self.log.debug("Finished dag run loop iteration. Remaining tasks %s", ti_status.to_run.values()) - - def _process_backfill_task_instances( - self, - ti_status: _DagRunTaskStatus, - pickle_id: int | None, - start_date: datetime.datetime | None = None, - *, - session: Session, - ) -> list: - """ - Process a set of task instances from a set of DAG runs. - - Special handling is done to account for different task instance states - that could be present when running them in a backfill process. - - :param ti_status: the internal status of the job - :param executor: the executor to run the task instances - :param pickle_id: the pickle_id if dag is pickled, None otherwise - :param start_date: the start date of the backfill job - :param session: the current session object - :return: the list of execution_dates for the finished dag runs - """ - executed_run_dates = [] - - while (ti_status.to_run or ti_status.running) and not ti_status.deadlocked: - self.log.debug("Clearing out not_ready list") - ti_status.not_ready.clear() - - # we need to execute the tasks bottom to top - # or leaf to root, as otherwise tasks might be - # determined deadlocked while they are actually - # waiting for their upstream to finish - def _per_task_process(key, ti: TaskInstance, session): - ti.refresh_from_db(lock_for_update=True, session=session) - - task = self.dag.get_task(ti.task_id) - ti.task = task - - self.log.debug("Task instance to run %s state %s", ti, ti.state) - - # The task was already marked successful or skipped by a - # different Job. Don't rerun it. - if ti.state == TaskInstanceState.SUCCESS: - ti_status.succeeded.add(key) - self.log.debug("Task instance %s succeeded. Don't rerun.", ti) - ti_status.to_run.pop(key) - if key in ti_status.running: - ti_status.running.pop(key) - return - elif ti.state == TaskInstanceState.SKIPPED: - ti_status.skipped.add(key) - self.log.debug("Task instance %s skipped. Don't rerun.", ti) - ti_status.to_run.pop(key) - if key in ti_status.running: - ti_status.running.pop(key) - return - - if self.rerun_failed_tasks: - # Rerun failed tasks or upstreamed failed tasks - if ti.state in (TaskInstanceState.FAILED, TaskInstanceState.UPSTREAM_FAILED): - self.log.error("Task instance %s with state %s", ti, ti.state) - if key in ti_status.running: - ti_status.running.pop(key) - # Reset the failed task in backfill to scheduled state - ti.try_number += 1 - ti.set_state(TaskInstanceState.SCHEDULED, session=session) - if ti.dag_run not in ti_status.active_runs: - ti_status.active_runs.add(ti.dag_run) - else: - # Default behaviour which works for subdag. - if ti.state in (TaskInstanceState.FAILED, TaskInstanceState.UPSTREAM_FAILED): - self.log.error("Task instance %s with state %s", ti, ti.state) - ti_status.failed.add(key) - ti_status.to_run.pop(key) - if key in ti_status.running: - ti_status.running.pop(key) - return - - if self.ignore_first_depends_on_past: - dagrun = ti.get_dagrun(session=session) - ignore_depends_on_past = dagrun.execution_date == (start_date or ti.start_date) - else: - ignore_depends_on_past = False - - backfill_context = DepContext( - deps=BACKFILL_QUEUED_DEPS, - ignore_depends_on_past=ignore_depends_on_past, - ignore_task_deps=self.ignore_task_deps, - wait_for_past_depends_before_skipping=False, - flag_upstream_failed=True, - ) - - executor = ExecutorLoader.load_executor(str(ti.executor) if ti.executor else None) - # Is the task runnable? -- then run it - # the dependency checker can change states of tis - if ti.are_dependencies_met( - dep_context=backfill_context, session=session, verbose=self.verbose - ): - if executor.has_task(ti): - self.log.debug("Task Instance %s already in executor waiting for queue to clear", ti) - else: - self.log.debug("Sending %s to executor", ti) - # Skip scheduled state, we are executing immediately - if ti.state in (TaskInstanceState.UP_FOR_RETRY, None): - # i am not sure why this is necessary. - # seemingly a quirk of backfill runner. - # it should be handled elsewhere i think. - # seems the leaf tasks are set SCHEDULED but others not. - # but i am not going to look too closely since we need - # to nuke the current backfill approach anyway. - ti.try_number += 1 - ti.state = TaskInstanceState.QUEUED - ti.queued_by_job_id = self.job.id - ti.queued_dttm = timezone.utcnow() - session.merge(ti) - try: - session.commit() - except OperationalError: - self.log.exception("Failed to commit task state change due to operational error") - session.rollback() - # early exit so the outer loop can retry - return - - cfg_path = None - - if executor.is_local: - cfg_path = tmp_configuration_copy() - - executor.queue_task_instance( - ti, - mark_success=self.mark_success, - pickle_id=pickle_id, - ignore_task_deps=self.ignore_task_deps, - ignore_depends_on_past=ignore_depends_on_past, - wait_for_past_depends_before_skipping=False, - pool=self.pool, - cfg_path=cfg_path, - ) - ti_status.running[key] = ti - ti_status.to_run.pop(key) - return - - if ti.state == TaskInstanceState.UPSTREAM_FAILED: - self.log.error("Task instance %s upstream failed", ti) - ti_status.failed.add(key) - ti_status.to_run.pop(key) - if key in ti_status.running: - ti_status.running.pop(key) - return - - # special case - if ti.state == TaskInstanceState.UP_FOR_RETRY: - self.log.debug("Task instance %s retry period not expired yet", ti) - if key in ti_status.running: - ti_status.running.pop(key) - ti_status.to_run[key] = ti - return - - # special case - if ti.state == TaskInstanceState.UP_FOR_RESCHEDULE: - self.log.debug("Task instance %s reschedule period not expired yet", ti) - if key in ti_status.running: - ti_status.running.pop(key) - ti_status.to_run[key] = ti - return - - # all remaining tasks - self.log.debug("Adding %s to not_ready", ti) - ti_status.not_ready.add(key) - - try: - for task in self.dag.topological_sort(): - for key, ti in list(ti_status.to_run.items()): - # Attempt to workaround deadlock on backfill by attempting to commit the transaction - # state update few times before giving up - max_attempts = 5 - for i in range(max_attempts): - if task.task_id != ti.task_id: - continue - - pool = session.scalar( - select(models.Pool).where(models.Pool.pool == task.pool).limit(1) - ) - if not pool: - raise PoolNotFound(f"Unknown pool: {task.pool}") - - open_slots = pool.open_slots(session=session) - if open_slots <= 0: - raise NoAvailablePoolSlot( - f"Not scheduling since there are {open_slots} " - f"open slots in pool {task.pool}" - ) - - num_running_task_instances_in_dag = DAG.get_num_task_instances( - self.dag_id, - states=self.STATES_COUNT_AS_RUNNING, - session=session, - ) - - if num_running_task_instances_in_dag >= self.dag.max_active_tasks: - raise DagConcurrencyLimitReached( - "Not scheduling since DAG max_active_tasks limit is reached." - ) - - if task.max_active_tis_per_dag is not None: - num_running_task_instances_in_task = DAG.get_num_task_instances( - dag_id=self.dag_id, - task_ids=[task.task_id], - states=self.STATES_COUNT_AS_RUNNING, - session=session, - ) - - if num_running_task_instances_in_task >= task.max_active_tis_per_dag: - raise TaskConcurrencyLimitReached( - "Not scheduling since Task concurrency limit is reached." - ) - - if task.max_active_tis_per_dagrun is not None: - num_running_task_instances_in_task_dagrun = DAG.get_num_task_instances( - dag_id=self.dag_id, - run_id=ti.run_id, - task_ids=[task.task_id], - states=self.STATES_COUNT_AS_RUNNING, - session=session, - ) - - if ( - num_running_task_instances_in_task_dagrun - >= task.max_active_tis_per_dagrun - ): - raise TaskConcurrencyLimitReached( - "Not scheduling since Task concurrency per DAG run limit is reached." - ) - - _per_task_process(key, ti, session) - try: - session.commit() - except OperationalError: - self.log.exception( - "Failed to commit task state due to operational error. " - "The job will retry this operation so if your backfill succeeds, " - "you can safely ignore this message.", - ) - session.rollback() - if i == max_attempts - 1: - raise - # retry the loop - else: - # break the retry loop - break - except (NoAvailablePoolSlot, DagConcurrencyLimitReached, TaskConcurrencyLimitReached) as e: - self.log.debug(e) - - perform_heartbeat( - job=self.job, - heartbeat_callback=self.heartbeat_callback, - only_if_necessary=True, - ) - # execute the tasks in the queue - for executor in self.job.executors: - executor.heartbeat() - - # If the set of tasks that aren't ready ever equals the set of - # tasks to run and there are no running tasks then the backfill - # is deadlocked - if ti_status.not_ready and ti_status.not_ready == set(ti_status.to_run) and not ti_status.running: - self.log.warning("Deadlock discovered for ti_status.to_run=%s", ti_status.to_run.values()) - ti_status.deadlocked.update(ti_status.to_run.values()) - ti_status.to_run.clear() - - for executor in self.job.executors: - # check executor state -- and expand any mapped TIs - for node, run_id, new_mapped_tis, max_map_index in self._manage_executor_state( - ti_status.running, executor, session - ): - - def to_keep(key: TaskInstanceKey) -> bool: - if key.dag_id != node.dag_id or key.task_id != node.task_id or key.run_id != run_id: - # For another Dag/Task/Run -- don't remove - return True - return 0 <= key.map_index <= max_map_index - - # remove the old unmapped TIs for node -- they have been replaced with the mapped TIs - ti_status.to_run = {key: ti for (key, ti) in ti_status.to_run.items() if to_keep(key)} - - ti_status.to_run.update({ti.key: ti for ti in new_mapped_tis}) - - for new_ti in new_mapped_tis: - new_ti.try_number += 1 - new_ti.set_state(TaskInstanceState.SCHEDULED, session=session) - - # Set state to failed for running TIs that are set up for retry if disable-retry flag is set - for ti in ti_status.running.values(): - if self.disable_retry and ti.state == TaskInstanceState.UP_FOR_RETRY: - ti.set_state(TaskInstanceState.FAILED, session=session) - - # update the task counters - self._update_counters(ti_status=ti_status, session=session) - session.commit() - - # update dag run state - _dag_runs = ti_status.active_runs.copy() - for run in _dag_runs: - run.update_state(session=session) - if run.state in State.finished_dr_states: - ti_status.finished_runs += 1 - ti_status.active_runs.remove(run) - executed_run_dates.append(run.execution_date) - - self._log_progress(ti_status) - session.commit() - time.sleep(1) - - # return updated status - return executed_run_dates - - @provide_session - def _collect_errors(self, ti_status: _DagRunTaskStatus, session: Session = NEW_SESSION) -> Iterator[str]: - def tabulate_ti_keys_set(ti_keys: Iterable[TaskInstanceKey]) -> str: - # Sorting by execution date first - sorted_ti_keys: Any = sorted( - ti_keys, - key=lambda ti_key: ( - ti_key.run_id, - ti_key.dag_id, - ti_key.task_id, - ti_key.map_index, - ti_key.try_number, - ), - ) - - if all(key.map_index == -1 for key in ti_keys): - headers = ["DAG ID", "Task ID", "Run ID", "Try number"] - sorted_ti_keys = (k[0:4] for k in sorted_ti_keys) - else: - headers = ["DAG ID", "Task ID", "Run ID", "Map Index", "Try number"] - - return tabulate(sorted_ti_keys, headers=headers) - - if ti_status.failed: - yield "Some task instances failed:\n" - yield tabulate_ti_keys_set(ti_status.failed) - if ti_status.deadlocked: - yield "BackfillJob is deadlocked." - deadlocked_depends_on_past = any( - t.are_dependencies_met( - dep_context=DepContext(ignore_depends_on_past=False), - session=session, - verbose=self.verbose, - ) - != t.are_dependencies_met( - dep_context=DepContext(ignore_depends_on_past=True), session=session, verbose=self.verbose - ) - for t in ti_status.deadlocked - ) - if deadlocked_depends_on_past: - yield ( - "Some of the deadlocked tasks were unable to run because " - 'of "depends_on_past" relationships. Try running the ' - "backfill with the option " - '"ignore_first_depends_on_past=True" or passing "-I" at ' - "the command line." - ) - yield "\nThese tasks have succeeded:\n" - yield tabulate_ti_keys_set(ti_status.succeeded) - yield "\n\nThese tasks are running:\n" - yield tabulate_ti_keys_set(ti_status.running) - yield "\n\nThese tasks have failed:\n" - yield tabulate_ti_keys_set(ti_status.failed) - yield "\n\nThese tasks are skipped:\n" - yield tabulate_ti_keys_set(ti_status.skipped) - yield "\n\nThese tasks are deadlocked:\n" - yield tabulate_ti_keys_set([ti.key for ti in ti_status.deadlocked]) - - @provide_session - def _execute_dagruns( - self, - dagrun_infos: Iterable[DagRunInfo], - ti_status: _DagRunTaskStatus, - pickle_id: int | None, - start_date: datetime.datetime | None, - session: Session = NEW_SESSION, - ) -> None: - """ - Compute and execute dag runs and their respective task instances for the given dates. - - Returns a list of execution dates of the dag runs that were executed. - - :param dagrun_infos: Schedule information for dag runs - :param ti_status: internal BackfillJobRunner status structure to tis track progress - :param pickle_id: numeric id of the pickled dag, None if not pickled - :param start_date: backfill start date - :param session: the current session object - """ - for dagrun_info in dagrun_infos: - dag_run = self._get_dag_run(dagrun_info, self.dag, session=session) - if dag_run is not None: - tis_map = self._task_instances_for_dag_run(self.dag, dag_run, session=session) - ti_status.active_runs.add(dag_run) - ti_status.to_run.update(tis_map or {}) - - tis_missing_executor = [] - for ti in ti_status.to_run.values(): - if ti.executor: - try: - ExecutorLoader.lookup_executor_name_by_str(ti.executor) - except UnknownExecutorException: - tis_missing_executor.append(ti) - - if tis_missing_executor: - raise UnknownExecutorException( - "The following task instances are configured to use an executor that is not present. " - "Review the core.executors Airflow configuration to add it or clear the task instance to " - "clear the executor configuration for this task.\n" - + "\n".join( - [f" {ti.task_id}: {ti.run_id} (executor: {ti.executor})" for ti in tis_missing_executor] - ) - ) - processed_dag_run_dates = self._process_backfill_task_instances( - ti_status=ti_status, - pickle_id=pickle_id, - start_date=start_date, - session=session, - ) - - ti_status.executed_dag_run_dates.update(processed_dag_run_dates) - - @provide_session - def _set_unfinished_dag_runs_to_failed( - self, - dag_runs: Iterable[DagRun], - session: Session = NEW_SESSION, - ) -> None: - """ - Update the state of each dagrun based on the task_instance state and set unfinished runs to failed. - - :param dag_runs: DAG runs - :param session: session - :return: None - """ - for dag_run in dag_runs: - dag_run.update_state() - if dag_run.state not in State.finished_dr_states: - dag_run.set_state(DagRunState.FAILED) - session.merge(dag_run) - - @provide_session - def _execute(self, session: Session = NEW_SESSION) -> None: - """ - Initialize all required components of a dag for a specified date range and execute the tasks. - - :meta private: - """ - ti_status = BackfillJobRunner._DagRunTaskStatus() - - start_date = self.bf_start_date - - # Get DagRun schedule between the start/end dates, which will turn into dag runs. - dagrun_start_date = timezone.coerce_datetime(start_date) - if self.bf_end_date is None: - dagrun_end_date = pendulum.now(timezone.utc) - else: - dagrun_end_date = pendulum.instance(self.bf_end_date) - dagrun_infos = list(self.dag.iter_dagrun_infos_between(dagrun_start_date, dagrun_end_date)) - if self.run_backwards: - tasks_that_depend_on_past = [t.task_id for t in self.dag.task_dict.values() if t.depends_on_past] - if tasks_that_depend_on_past: - raise AirflowException( - f"You cannot backfill backwards because one or more " - f'tasks depend_on_past: {",".join(tasks_that_depend_on_past)}' - ) - dagrun_infos = dagrun_infos[::-1] - - if not dagrun_infos: - if not self.run_at_least_once: - self.log.info("No run dates were found for the given dates and dag interval.") - return - dagrun_infos = [DagRunInfo.interval(dagrun_start_date, dagrun_end_date)] - - running_dagruns = DagRun.find( - dag_id=self.dag.dag_id, - execution_start_date=self.bf_start_date, - execution_end_date=self.bf_end_date, - no_backfills=True, - state=DagRunState.RUNNING, - ) - - if running_dagruns: - for run in running_dagruns: - self.log.error( - "Backfill cannot be created for DagRun %s in %s, as there's already %s in a RUNNING " - "state.", - run.run_id, - run.execution_date.strftime("%Y-%m-%dT%H:%M:%S"), - run.run_type, - ) - self.log.error( - "Changing DagRun into BACKFILL would cause scheduler to lose track of executing " - "tasks. Not changing DagRun type into BACKFILL, and trying insert another DagRun into " - "database would cause database constraint violation for dag_id + execution_date " - "combination. Please adjust backfill dates or wait for this DagRun to finish.", - ) - return - pickle_id = None - - _support_pickling = [] - - for executor in self.job.executors: - _support_pickling.append(executor.supports_pickling) - - executor.job_id = self.job.id - executor.start() - - if not self.donot_pickle and all(_support_pickling): - pickle = DagPickle(self.dag) - session.add(pickle) - session.commit() - pickle_id = pickle.id - - ti_status.total_runs = len(dagrun_infos) # total dag runs in backfill - - try: - remaining_dates = ti_status.total_runs - while remaining_dates > 0: - dagrun_infos_to_process = [ - dagrun_info - for dagrun_info in dagrun_infos - if dagrun_info.logical_date not in ti_status.executed_dag_run_dates - ] - self._execute_dagruns( - dagrun_infos=dagrun_infos_to_process, - ti_status=ti_status, - pickle_id=pickle_id, - start_date=start_date, - session=session, - ) - - remaining_dates = ti_status.total_runs - len(ti_status.executed_dag_run_dates) - err = "".join(self._collect_errors(ti_status=ti_status, session=session)) - if err: - if not self.continue_on_failures or ti_status.deadlocked: - raise BackfillUnfinished(err, ti_status) - - if remaining_dates > 0: - self.log.info( - "max_active_runs limit for dag %s has been reached " - " - waiting for other dag runs to finish", - self.dag_id, - ) - time.sleep(self.delay_on_limit_secs) - except (KeyboardInterrupt, SystemExit): - self.log.warning("Backfill terminated by user.") - - # TODO: we will need to terminate running task instances and set the - # state to failed. - self._set_unfinished_dag_runs_to_failed(ti_status.active_runs) - except OperationalError: - self.log.exception( - "Backfill job dead-locked. The job will retry the job so it is likely " - "to heal itself. If your backfill succeeds you can ignore this exception.", - ) - raise - finally: - session.commit() - for executor in self.job.executors: - executor.end() - - self.log.info("Backfill done for DAG %s. Exiting.", self.dag) - - @provide_session - def reset_state_for_orphaned_tasks( - self, - filter_by_dag_run: DagRun | None = None, - session: Session = NEW_SESSION, - ) -> int | None: - """ - Reset state of orphaned tasks. - - This function checks if there are any tasks in the dagrun (or all) that - have a schedule or queued states but are not known by the executor. If - it finds those it will reset the state to None so they will get picked - up again. The batch option is for performance reasons as the queries - are made in sequence. - - :param filter_by_dag_run: the dag_run we want to process, None if all - :return: the number of TIs reset - """ - queued_tis = [] - running_tis = [] - for executor in self.job.executors: - queued_tis.append(executor.queued_tasks) - # also consider running as the state might not have changed in the db yet - running_tis.append(executor.running) - - # Can't use an update here since it doesn't support joins. - resettable_states = [TaskInstanceState.SCHEDULED, TaskInstanceState.QUEUED] - if filter_by_dag_run is None: - resettable_tis = ( - session.scalars( - select(TaskInstance) - .join(TaskInstance.dag_run) - .where( - DagRun.state == DagRunState.RUNNING, - DagRun.run_type != DagRunType.BACKFILL_JOB, - TaskInstance.state.in_(resettable_states), - ) - ) - ).all() - else: - resettable_tis = filter_by_dag_run.get_task_instances(state=resettable_states, session=session) - - tis_to_reset = [ti for ti in resettable_tis if ti.key not in queued_tis and ti.key not in running_tis] - if not tis_to_reset: - return 0 - - def query(result, items): - if not items: - return result - - filter_for_tis = TaskInstance.filter_for_tis(items) - reset_tis = session.scalars( - select(TaskInstance) - .where(filter_for_tis, TaskInstance.state.in_(resettable_states)) - .with_for_update() - ).all() - - for ti in reset_tis: - ti.state = None - session.merge(ti) - - return result + reset_tis - - reset_tis = helpers.reduce_in_chunks(query, tis_to_reset, [], self.job.max_tis_per_query) - - task_instance_str = "\n".join(f"\t{x!r}" for x in reset_tis) - session.flush() - - self.log.info("Reset the following %s TaskInstances:\n%s", len(reset_tis), task_instance_str) - return len(reset_tis) diff --git a/airflow/jobs/job.py b/airflow/jobs/job.py index 03bf92d4e3d53..0c2db219ef957 100644 --- a/airflow/jobs/job.py +++ b/airflow/jobs/job.py @@ -77,8 +77,6 @@ class Job(Base, LoggingMixin): The ORM class representing Job stored in the database. Jobs are processing items with state and duration that aren't task instances. - For instance a BackfillJob is a collection of task instance runs, - but should have its own state, start and end time. """ __tablename__ = "job" @@ -117,7 +115,7 @@ class Job(Base, LoggingMixin): """ TaskInstances which have been enqueued by this Job. - Only makes sense for SchedulerJob and BackfillJob instances. + Only makes sense for SchedulerJob. """ def __init__(self, executor: BaseExecutor | None = None, heartrate=None, **kwargs): diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index 2999ed391bc94..30f58885a9aa2 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -30,7 +30,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Collection, Iterable, Iterator -from sqlalchemy import and_, delete, func, not_, or_, select, text, update +from sqlalchemy import and_, delete, exists, func, not_, or_, select, text, update from sqlalchemy.exc import OperationalError from sqlalchemy.orm import lazyload, load_only, make_transient, selectinload from sqlalchemy.sql import expression @@ -45,12 +45,14 @@ from airflow.jobs.job import Job, perform_heartbeat from airflow.models import Log from airflow.models.asset import ( + AssetActive, AssetDagRunQueue, AssetEvent, AssetModel, DagScheduleAssetReference, TaskOutletAssetReference, ) +from airflow.models.backfill import Backfill from airflow.models.dag import DAG, DagModel from airflow.models.dagbag import DagBag from airflow.models.dagrun import DagRun @@ -1063,6 +1065,11 @@ def _run_scheduler_loop(self) -> None: self.check_trigger_timeouts, ) + timers.call_regular_interval( + 30, + self._mark_backfills_complete, + ) + timers.call_regular_interval( conf.getfloat("scheduler", "pool_metrics_interval", fallback=5.0), self._emit_pool_metrics, @@ -1288,6 +1295,28 @@ def _create_dagruns_for_dags(self, guard: CommitProhibitorGuard, session: Sessio guard.commit() # END: create dagruns + @provide_session + def _mark_backfills_complete(self, session: Session = NEW_SESSION) -> None: + """Mark completed backfills as completed.""" + self.log.debug("checking for completed backfills.") + unfinished_states = (DagRunState.RUNNING, DagRunState.QUEUED) + now = timezone.utcnow() + # todo: AIP-78 simplify this function to an update statement + query = select(Backfill).where( + Backfill.completed_at.is_(None), + ~exists( + select(DagRun.id).where( + and_(DagRun.backfill_id == Backfill.id, DagRun.state.in_(unfinished_states)) + ) + ), + ) + backfills = session.scalars(query).all() + if not backfills: + return + self.log.info("marking %s backfills as complete", len(backfills)) + for b in backfills: + b.completed_at = now + @add_span def _create_dag_runs(self, dag_models: Collection[DagModel], session: Session) -> None: """Create a DAG run and update the dag_model to control if/when the next DAGRun should be created.""" @@ -2034,15 +2063,14 @@ def _cleanup_stale_dags(self, session: Session = NEW_SESSION) -> None: SerializedDagModel.remove_dag(dag_id=dag.dag_id, session=session) session.flush() - def _set_orphaned(self, asset: AssetModel) -> int: - self.log.info("Orphaning unreferenced asset '%s'", asset.uri) - asset.is_orphaned = expression.true() - return 1 + def _get_orphaning_identifier(self, asset: AssetModel) -> tuple[str, str]: + self.log.info("Orphaning unreferenced %s", asset) + return asset.name, asset.uri @provide_session def _orphan_unreferenced_assets(self, session: Session = NEW_SESSION) -> None: """ - Detect orphaned assets and set is_orphaned flag to True. + Detect orphaned assets and remove their active entry. An orphaned asset is no longer referenced in any DAG schedule parameters or task outlets. """ @@ -2057,7 +2085,7 @@ def _orphan_unreferenced_assets(self, session: Session = NEW_SESSION) -> None: isouter=True, ) .group_by(AssetModel.id) - .where(~AssetModel.is_orphaned) + .where(AssetModel.active.has()) .having( and_( func.count(DagScheduleAssetReference.dag_id) == 0, @@ -2066,8 +2094,13 @@ def _orphan_unreferenced_assets(self, session: Session = NEW_SESSION) -> None: ) ) - updated_count = sum(self._set_orphaned(asset) for asset in orphaned_asset_query) - Stats.gauge("asset.orphaned", updated_count) + orphaning_identifiers = [self._get_orphaning_identifier(asset) for asset in orphaned_asset_query] + session.execute( + delete(AssetActive).where( + tuple_in_condition((AssetActive.name, AssetActive.uri), orphaning_identifiers) + ) + ) + Stats.gauge("asset.orphaned", len(orphaning_identifiers)) def _executor_to_tis(self, tis: list[TaskInstance]) -> dict[BaseExecutor, list[TaskInstance]]: """Organize TIs into lists per their respective executor.""" diff --git a/airflow/migrations/versions/0035_3_0_0_add_name_field_to_dataset_model.py b/airflow/migrations/versions/0035_3_0_0_add_name_field_to_dataset_model.py index 6016dd9658908..2460b6956cff6 100644 --- a/airflow/migrations/versions/0035_3_0_0_add_name_field_to_dataset_model.py +++ b/airflow/migrations/versions/0035_3_0_0_add_name_field_to_dataset_model.py @@ -63,7 +63,9 @@ def upgrade(): batch_op.add_column(sa.Column("name", _STRING_COLUMN_TYPE)) batch_op.add_column(sa.Column("group", _STRING_COLUMN_TYPE, default=str, nullable=False)) # Fill name from uri column. - Session(bind=op.get_bind()).execute(sa.text("update dataset set name=uri")) + with Session(bind=op.get_bind()) as session: + session.execute(sa.text("update dataset set name=uri")) + session.commit() # Set the name column non-nullable. # Now with values in there, we can create the new unique constraint and index. # Due to MySQL restrictions, we are also reducing the length on uri. diff --git a/airflow/migrations/versions/0037_3_0_0_add_asset_active.py b/airflow/migrations/versions/0037_3_0_0_add_asset_active.py new file mode 100644 index 0000000000000..422bc440dba85 --- /dev/null +++ b/airflow/migrations/versions/0037_3_0_0_add_asset_active.py @@ -0,0 +1,88 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Add AssetActive to track orphaning instead of a flag. + +Revision ID: 5a5d66100783 +Revises: c3389cd7793f +Create Date: 2024-10-01 08:39:48.997198 + +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.orm import Session + +# revision identifiers, used by Alembic. +revision = "5a5d66100783" +down_revision = "c3389cd7793f" +branch_labels = None +depends_on = None +airflow_version = "3.0.0" + +_STRING_COLUMN_TYPE = sa.String(length=1500).with_variant( + sa.String(length=1500, collation="latin1_general_cs"), + dialect_name="mysql", +) + + +def upgrade(): + op.create_table( + "asset_active", + sa.Column("name", _STRING_COLUMN_TYPE, nullable=False), + sa.Column("uri", _STRING_COLUMN_TYPE, nullable=False), + sa.PrimaryKeyConstraint("name", "uri", name="asset_active_pkey"), + sa.ForeignKeyConstraint( + columns=["name", "uri"], + refcolumns=["dataset.name", "dataset.uri"], + name="asset_active_asset_name_uri_fkey", + ondelete="CASCADE", + ), + sa.Index("idx_asset_active_name_unique", "name", unique=True), + sa.Index("idx_asset_active_uri_unique", "uri", unique=True), + ) + with Session(bind=op.get_bind()) as session: + session.execute( + sa.text( + "insert into asset_active (name, uri) " + "select name, uri from dataset where is_orphaned = false" + ) + ) + session.commit() + with op.batch_alter_table("dataset", schema=None) as batch_op: + batch_op.drop_column("is_orphaned") + + +def downgrade(): + with op.batch_alter_table("dataset", schema=None) as batch_op: + batch_op.add_column( + sa.Column("is_orphaned", sa.Boolean, default=False, nullable=False, server_default="0") + ) + with Session(bind=op.get_bind()) as session: + session.execute( + sa.text( + "update dataset set is_orphaned = true " + "where exists (select 1 from asset_active " + "where dataset.name = asset_active.name and dataset.uri = asset_active.uri)" + ) + ) + session.commit() + op.drop_table("asset_active") diff --git a/airflow/models/asset.py b/airflow/models/asset.py index fb56bc4bf1ecf..d5ca0ea513f12 100644 --- a/airflow/models/asset.py +++ b/airflow/models/asset.py @@ -21,7 +21,6 @@ import sqlalchemy_jsonfield from sqlalchemy import ( - Boolean, Column, ForeignKey, ForeignKeyConstraint, @@ -192,7 +191,8 @@ class AssetModel(Base): created_at = Column(UtcDateTime, default=timezone.utcnow, nullable=False) updated_at = Column(UtcDateTime, default=timezone.utcnow, onupdate=timezone.utcnow, nullable=False) - is_orphaned = Column(Boolean, default=False, nullable=False, server_default="0") + + active = relationship("AssetActive", uselist=False, viewonly=True) consuming_dags = relationship("DagScheduleAssetReference", back_populates="dataset") producing_tasks = relationship("TaskOutletAssetReference", back_populates="dataset") @@ -232,6 +232,61 @@ def to_public(self) -> Asset: return Asset(uri=self.uri, extra=self.extra) +class AssetActive(Base): + """ + Collection of active assets. + + An asset is considered active if it is declared by the user in any DAG files. + AssetModel entries that are not active (also called orphaned in some parts + of the code base) are still kept in the database, but have their corresponding + entries in this table removed. This ensures we keep all possible history on + distinct assets (those with non-matching name-URI pairs), but still ensure + *name and URI are each unique* within active assets. + """ + + name = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + uri = Column( + String(length=1500).with_variant( + String( + length=1500, + # latin1 allows for more indexed length in mysql + # and this field should only be ascii chars + collation="latin1_general_cs", + ), + "mysql", + ), + nullable=False, + ) + + __tablename__ = "asset_active" + __table_args__ = ( + PrimaryKeyConstraint(name, uri, name="asset_active_pkey"), + ForeignKeyConstraint( + columns=[name, uri], + refcolumns=["dataset.name", "dataset.uri"], + name="asset_active_asset_name_uri_fkey", + ondelete="CASCADE", + ), + Index("idx_asset_active_name_unique", name, unique=True), + Index("idx_asset_active_uri_unique", uri, unique=True), + ) + + @classmethod + def for_asset(cls, asset: AssetModel) -> AssetActive: + return cls(name=asset.name, uri=asset.uri) + + class DagScheduleAssetAliasReference(Base): """References from a DAG to an asset alias of which it is a consumer.""" diff --git a/airflow/models/backfill.py b/airflow/models/backfill.py index e8c31015c27b6..aa9cb695b7579 100644 --- a/airflow/models/backfill.py +++ b/airflow/models/backfill.py @@ -32,9 +32,7 @@ from airflow.api_connexion.exceptions import Conflict, NotFound from airflow.exceptions import AirflowException -from airflow.models import DagRun from airflow.models.base import Base, StringID -from airflow.models.serialized_dag import SerializedDagModel from airflow.settings import json from airflow.utils import timezone from airflow.utils.session import create_session @@ -43,7 +41,8 @@ from airflow.utils.types import DagRunTriggeredByType, DagRunType if TYPE_CHECKING: - from pendulum import DateTime + from datetime import datetime + log = logging.getLogger(__name__) @@ -123,12 +122,14 @@ def validate_sort_ordinal(self, key, val): def _create_backfill( *, dag_id: str, - from_date: DateTime, - to_date: DateTime, + from_date: datetime, + to_date: datetime, max_active_runs: int, reverse: bool, dag_run_conf: dict | None, ) -> Backfill | None: + from airflow.models.serialized_dag import SerializedDagModel + with create_session() as session: serdag = session.get(SerializedDagModel, dag_id) if not serdag: @@ -215,6 +216,8 @@ def _cancel_backfill(backfill_id) -> Backfill: session.commit() + from airflow.models import DagRun + # now, let's mark all queued dag runs as failed query = ( update(DagRun) diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 9e0c8e1e69b61..514553e05a2dd 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -34,6 +34,7 @@ import warnings from datetime import datetime, timedelta from functools import total_ordering, wraps +from threading import local from types import FunctionType from typing import ( TYPE_CHECKING, @@ -44,7 +45,6 @@ NoReturn, Sequence, TypeVar, - Union, cast, ) @@ -127,7 +127,9 @@ def parse_retries(retries: Any) -> int | None: - if retries is None or type(retries) == int: # noqa: E721 + if retries is None: + return 0 + elif type(retries) == int: # noqa: E721 return retries try: parsed_retries = int(retries) @@ -390,6 +392,8 @@ class ExecutorSafeguard: """ test_mode = conf.getboolean("core", "unit_test_mode") + _sentinel = local() + _sentinel.callers = {} @classmethod def decorator(cls, func): @@ -397,7 +401,13 @@ def decorator(cls, func): def wrapper(self, *args, **kwargs): from airflow.decorators.base import DecoratedOperator - sentinel = kwargs.pop(f"{self.__class__.__name__}__sentinel", None) + sentinel_key = f"{self.__class__.__name__}__sentinel" + sentinel = kwargs.pop(sentinel_key, None) + + if sentinel: + cls._sentinel.callers[sentinel_key] = sentinel + else: + sentinel = cls._sentinel.callers.pop(f"{func.__qualname__.split('.')[0]}__sentinel", None) if not cls.test_mode and not sentinel == _sentinel and not isinstance(self, DecoratedOperator): message = f"{self.__class__.__name__}.{func.__name__} cannot be called outside TaskInstance!" @@ -1778,10 +1788,6 @@ def expand_start_trigger_args(self, *, context: Context, session: Session) -> St return self.start_trigger_args -# TODO: Deprecate for Airflow 3.0 -Chainable = Union[DependencyMixin, Sequence[DependencyMixin]] - - def chain(*tasks: DependencyMixin | Sequence[DependencyMixin]) -> None: r""" Given a number of tasks, builds a dependency chain. diff --git a/airflow/models/dag.py b/airflow/models/dag.py index 246e0e95ec617..f8d9f55e56e18 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -28,7 +28,6 @@ import sys import time import traceback -import warnings import weakref from collections import abc, defaultdict, deque from contextlib import ExitStack @@ -88,13 +87,11 @@ DuplicateTaskIdFound, FailStopDagInvalidTriggerRule, ParamValidationError, - RemovedInAirflow3Warning, TaskDeferred, TaskNotFound, UnknownExecutorException, ) from airflow.executors.executor_loader import ExecutorLoader -from airflow.jobs.job import run_job from airflow.models.abstractoperator import AbstractOperator, TaskStateChangeCallback from airflow.models.asset import ( AssetDagRunQueue, @@ -113,7 +110,6 @@ clear_task_instances, ) from airflow.models.tasklog import LogTemplate -from airflow.providers.fab import __version__ as FAB_VERSION from airflow.secrets.local_filesystem import LocalFilesystemBackend from airflow.security import permissions from airflow.settings import json @@ -796,6 +792,9 @@ def _upgrade_outdated_dag_access_control(access_control=None): """Look for outdated dag level actions in DAG access_controls and replace them with updated actions.""" if access_control is None: return None + + from airflow.providers.fab import __version__ as FAB_VERSION + updated_access_control = {} for role, perms in access_control.items(): if packaging_version.parse(FAB_VERSION) >= packaging_version.parse("1.3.0"): @@ -2294,85 +2293,6 @@ def _remove_task(self, task_id: str) -> None: self.task_count = len(self.task_dict) - def run( - self, - start_date=None, - end_date=None, - mark_success=False, - local=False, - donot_pickle=airflow_conf.getboolean("core", "donot_pickle"), - ignore_task_deps=False, - ignore_first_depends_on_past=True, - pool=None, - delay_on_limit_secs=1.0, - verbose=False, - conf=None, - rerun_failed_tasks=False, - run_backwards=False, - run_at_least_once=False, - continue_on_failures=False, - disable_retry=False, - ): - """ - Run the DAG. - - :param start_date: the start date of the range to run - :param end_date: the end date of the range to run - :param mark_success: True to mark jobs as succeeded without running them - :param local: True to run the tasks using the LocalExecutor - :param donot_pickle: True to avoid pickling DAG object and send to workers - :param ignore_task_deps: True to skip upstream tasks - :param ignore_first_depends_on_past: True to ignore depends_on_past - dependencies for the first set of tasks only - :param pool: Resource pool to use - :param delay_on_limit_secs: Time in seconds to wait before next attempt to run - dag run when max_active_runs limit has been reached - :param verbose: Make logging output more verbose - :param conf: user defined dictionary passed from CLI - :param rerun_failed_tasks: - :param run_backwards: - :param run_at_least_once: If true, always run the DAG at least once even - if no logical run exists within the time range. - """ - warnings.warn( - "`DAG.run()` is deprecated and will be removed in Airflow 3.0. Consider " - "using `DAG.test()` instead, or trigger your dag via API.", - RemovedInAirflow3Warning, - stacklevel=2, - ) - - from airflow.executors.executor_loader import ExecutorLoader - from airflow.jobs.backfill_job_runner import BackfillJobRunner - - if local: - from airflow.executors.local_executor import LocalExecutor - - ExecutorLoader.set_default_executor(LocalExecutor()) - - from airflow.jobs.job import Job - - job = Job() - job_runner = BackfillJobRunner( - job=job, - dag=self, - start_date=start_date, - end_date=end_date, - mark_success=mark_success, - donot_pickle=donot_pickle, - ignore_task_deps=ignore_task_deps, - ignore_first_depends_on_past=ignore_first_depends_on_past, - pool=pool, - delay_on_limit_secs=delay_on_limit_secs, - verbose=verbose, - conf=conf, - rerun_failed_tasks=rerun_failed_tasks, - run_backwards=run_backwards, - run_at_least_once=run_at_least_once, - continue_on_failures=continue_on_failures, - disable_retry=disable_retry, - ) - run_job(job=job, execute_callable=job_runner._execute) - def cli(self): """Exposes a CLI specific to this DAG.""" check_cycle(self) @@ -2461,8 +2381,7 @@ def add_logger_if_needed(ti: TaskInstance): tasks = self.task_dict self.log.debug("starting dagrun") # Instead of starting a scheduler, we run the minimal loop possible to check - # for task readiness and dependency management. This is notably faster - # than creating a BackfillJob and allows us to surface logs to the user + # for task readiness and dependency management. # ``Dag.test()`` works in two different modes depending on ``use_executor``: # - if ``use_executor`` is False, runs the task locally with no executor using ``_run_task`` @@ -2674,6 +2593,7 @@ def bulk_write_to_db( orm_asset_aliases = asset_op.add_asset_aliases(session=session) session.flush() # This populates id so we can create fks in later calls. + asset_op.add_asset_active_references(orm_assets.values(), session=session) asset_op.add_dag_asset_references(orm_dags, orm_assets, session=session) asset_op.add_dag_asset_alias_references(orm_dags, orm_asset_aliases, session=session) asset_op.add_task_asset_references(orm_dags, orm_assets, session=session) @@ -3123,6 +3043,18 @@ def set_is_paused(self, is_paused: bool, session=NEW_SESSION) -> None: def dag_display_name(self) -> str: return self._dag_display_property_value or self.dag_id + @dag_display_name.expression # type: ignore[no-redef] + def dag_display_name(self) -> str: + """ + Expression part of the ``dag_display`` name hybrid property. + + :meta private: + """ + return case( + (self._dag_display_property_value.isnot(None), self._dag_display_property_value), + else_=self.dag_id, + ) + @classmethod @internal_api_call @provide_session diff --git a/airflow/models/dagpickle.py b/airflow/models/dagpickle.py index e6f4561d8e1bf..c06ef09709f1c 100644 --- a/airflow/models/dagpickle.py +++ b/airflow/models/dagpickle.py @@ -32,7 +32,7 @@ class DagPickle(Base): """ - Represents a version of a DAG and becomes a source of truth for a BackfillJob execution. + Represents a version of a DAG and becomes a source of truth for an execution. Dags can originate from different places (user repos, main repo, ...) and also get executed in different places (different executors). A pickle is a native python serialized object, diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 3abf16b7f9ee3..cad82e72b8b2d 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -57,6 +57,7 @@ from airflow.listeners.listener import get_listener_manager from airflow.models import Log from airflow.models.abstractoperator import NotMapped +from airflow.models.backfill import Backfill from airflow.models.base import Base, StringID from airflow.models.expandinput import NotFullyPopulated from airflow.models.taskinstance import TaskInstance as TI @@ -207,7 +208,7 @@ class DagRun(Base, LoggingMixin): uselist=False, cascade="all, delete, delete-orphan", ) - backfill = relationship("Backfill", uselist=False) + backfill = relationship(Backfill, uselist=False) backfill_max_active_runs = association_proxy("backfill", "max_active_runs") max_active_runs = association_proxy("dag_model", "max_active_runs") @@ -1110,7 +1111,7 @@ def notify_dagrun_state_changed(self, msg: str = ""): elif self.state == DagRunState.FAILED: get_listener_manager().hook.on_dag_run_failed(dag_run=self, msg=msg) # deliberately not notifying on QUEUED - # we can't get all the state changes on SchedulerJob, BackfillJob + # we can't get all the state changes on SchedulerJob, # or LocalTaskJob, so we don't want to "falsely advertise" we notify about that def _get_ready_tis( diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 333a4cad91cbe..c1373e5d6a12d 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -31,6 +31,7 @@ from contextlib import nullcontext from datetime import timedelta from enum import Enum +from functools import cache from typing import TYPE_CHECKING, Any, Callable, Collection, Generator, Iterable, Mapping, Tuple from urllib.parse import quote @@ -69,7 +70,6 @@ from airflow.api_internal.internal_api_call import InternalApiConfig, internal_api_call from airflow.assets import Asset, AssetAlias from airflow.assets.manager import asset_manager -from airflow.compat.functools import cache from airflow.configuration import conf from airflow.exceptions import ( AirflowException, @@ -1632,11 +1632,12 @@ def _get_previous_ti( @internal_api_call @provide_session -def _update_rtif(ti, rendered_fields, session: Session | None = None): +def _update_rtif(ti, rendered_fields, session: Session = NEW_SESSION): from airflow.models.renderedtifields import RenderedTaskInstanceFields rtif = RenderedTaskInstanceFields(ti=ti, render_templates=False, rendered_fields=rendered_fields) RenderedTaskInstanceFields.write(rtif, session=session) + session.flush() RenderedTaskInstanceFields.delete_old_records(ti.task_id, ti.dag_id, session=session) @@ -2640,7 +2641,7 @@ def _check_and_change_state_before_execution( :param mark_success: Don't run the task, mark its state as success :param test_mode: Doesn't record success or failure in the DB :param hostname: The hostname of the worker running the task instance. - :param job_id: Job (BackfillJob / LocalTaskJob / SchedulerJob) ID + :param job_id: Job (LocalTaskJob / SchedulerJob) ID :param pool: specifies the pool to use to run the task instance :param external_executor_id: The identifier of the celery executor :param session: SQLAlchemy ORM Session @@ -3405,53 +3406,6 @@ def render_templates( return original_task - def render_k8s_pod_yaml(self) -> dict | None: - """Render the k8s pod yaml.""" - try: - from airflow.providers.cncf.kubernetes.template_rendering import ( - render_k8s_pod_yaml as render_k8s_pod_yaml_from_provider, - ) - except ImportError: - raise RuntimeError( - "You need to have the `cncf.kubernetes` provider installed to use this feature. " - "Also rather than calling it directly you should import " - "render_k8s_pod_yaml from airflow.providers.cncf.kubernetes.template_rendering " - "and call it with TaskInstance as the first argument." - ) - warnings.warn( - "You should not call `task_instance.render_k8s_pod_yaml` directly. This method will be removed" - "in Airflow 3. Rather than calling it directly you should import " - "`render_k8s_pod_yaml` from `airflow.providers.cncf.kubernetes.template_rendering` " - "and call it with `TaskInstance` as the first argument.", - DeprecationWarning, - stacklevel=2, - ) - return render_k8s_pod_yaml_from_provider(self) - - @provide_session - def get_rendered_k8s_spec(self, session: Session = NEW_SESSION): - """Render the k8s pod yaml.""" - try: - from airflow.providers.cncf.kubernetes.template_rendering import ( - get_rendered_k8s_spec as get_rendered_k8s_spec_from_provider, - ) - except ImportError: - raise RuntimeError( - "You need to have the `cncf.kubernetes` provider installed to use this feature. " - "Also rather than calling it directly you should import " - "`get_rendered_k8s_spec` from `airflow.providers.cncf.kubernetes.template_rendering` " - "and call it with `TaskInstance` as the first argument." - ) - warnings.warn( - "You should not call `task_instance.render_k8s_pod_yaml` directly. This method will be removed" - "in Airflow 3. Rather than calling it directly you should import " - "`get_rendered_k8s_spec` from `airflow.providers.cncf.kubernetes.template_rendering` " - "and call it with `TaskInstance` as the first argument.", - DeprecationWarning, - stacklevel=2, - ) - return get_rendered_k8s_spec_from_provider(self, session=session) - def get_email_subject_content( self, exception: BaseException, task: BaseOperator | None = None ) -> tuple[str, str, str]: diff --git a/airflow/operators/python.py b/airflow/operators/python.py index a4788caedf438..b032b45ed3e6e 100644 --- a/airflow/operators/python.py +++ b/airflow/operators/python.py @@ -30,13 +30,13 @@ import warnings from abc import ABCMeta, abstractmethod from collections.abc import Container +from functools import cache from pathlib import Path from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Any, Callable, Collection, Iterable, Mapping, NamedTuple, Sequence import lazy_object_proxy -from airflow.compat.functools import cache from airflow.exceptions import ( AirflowConfigException, AirflowException, diff --git a/airflow/providers/.gitignore b/airflow/providers/.gitignore deleted file mode 100644 index 9b4a1a9d8f3ed..0000000000000 --- a/airflow/providers/.gitignore +++ /dev/null @@ -1 +0,0 @@ -get_provider_info.py diff --git a/airflow/providers_manager.py b/airflow/providers_manager.py index 2c673063cb23e..573d256d6e59a 100644 --- a/airflow/providers_manager.py +++ b/airflow/providers_manager.py @@ -31,13 +31,13 @@ from dataclasses import dataclass from functools import wraps from time import perf_counter -from typing import TYPE_CHECKING, Any, Callable, MutableMapping, NamedTuple, NoReturn, TypeVar +from typing import TYPE_CHECKING, Any, Callable, MutableMapping, NamedTuple, TypeVar from packaging.utils import canonicalize_name from airflow.exceptions import AirflowOptionalProviderFeatureException -from airflow.hooks.filesystem import FSHook -from airflow.hooks.package_index import PackageIndexHook +from airflow.providers.standard.hooks.filesystem import FSHook +from airflow.providers.standard.hooks.package_index import PackageIndexHook from airflow.typing_compat import ParamSpec from airflow.utils import yaml from airflow.utils.entry_points import entry_points_with_dist @@ -362,7 +362,7 @@ def _correctness_check(provider_package: str, class_name: str, provider_info: Pr # We want to have better control over initialization of parameters and be able to debug and test it # So we add our own decorator -def provider_info_cache(cache_name: str) -> Callable[[Callable[PS, NoReturn]], Callable[PS, None]]: +def provider_info_cache(cache_name: str) -> Callable[[Callable[PS, None]], Callable[PS, None]]: """ Decorate and cache provider info. @@ -370,7 +370,7 @@ def provider_info_cache(cache_name: str) -> Callable[[Callable[PS, NoReturn]], C :param cache_name: Name of the cache """ - def provider_info_cache_decorator(func: Callable[PS, NoReturn]) -> Callable[PS, None]: + def provider_info_cache_decorator(func: Callable[PS, None]) -> Callable[PS, None]: @wraps(func) def wrapped_function(*args: PS.args, **kwargs: PS.kwargs) -> None: providers_manager_instance = args[0] diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml index 8a35282492059..bbf930c9d5fa2 100644 --- a/airflow/reproducible_build.yaml +++ b/airflow/reproducible_build.yaml @@ -1,2 +1,2 @@ -release-notes-hash: cc9c5c2ea1cade5d714aa4832587e13a -source-date-epoch: 1727595745 +release-notes-hash: 48b5906017b111b01aeb940a1ec904bd +source-date-epoch: 1728409953 diff --git a/airflow/sensors/filesystem.py b/airflow/sensors/filesystem.py index 5d32ab07ad4e7..4496f5d6abfa4 100644 --- a/airflow/sensors/filesystem.py +++ b/airflow/sensors/filesystem.py @@ -25,7 +25,7 @@ from airflow.configuration import conf from airflow.exceptions import AirflowException -from airflow.hooks.filesystem import FSHook +from airflow.providers.standard.hooks.filesystem import FSHook from airflow.sensors.base import BaseSensorOperator from airflow.triggers.base import StartTriggerArgs from airflow.triggers.file import FileTrigger diff --git a/airflow/serialization/pydantic/asset.py b/airflow/serialization/pydantic/asset.py index 29806d3bdf911..4cd264902091a 100644 --- a/airflow/serialization/pydantic/asset.py +++ b/airflow/serialization/pydantic/asset.py @@ -51,7 +51,6 @@ class AssetPydantic(BaseModelPydantic): extra: Optional[dict] created_at: datetime updated_at: datetime - is_orphaned: bool consuming_dags: List[DagScheduleAssetReferencePydantic] producing_tasks: List[TaskOutletAssetReferencePydantic] diff --git a/airflow/serialization/serialized_objects.py b/airflow/serialization/serialized_objects.py index a4801b767acc5..9f180c2a5deac 100644 --- a/airflow/serialization/serialized_objects.py +++ b/airflow/serialization/serialized_objects.py @@ -24,6 +24,7 @@ import inspect import logging import weakref +from functools import cache from inspect import signature from textwrap import dedent from typing import TYPE_CHECKING, Any, Collection, Iterable, Mapping, NamedTuple, Union, cast @@ -43,7 +44,6 @@ _AssetAliasCondition, ) from airflow.callbacks.callback_requests import DagCallbackRequest, TaskCallbackRequest -from airflow.compat.functools import cache from airflow.exceptions import AirflowException, SerializationError, TaskDeferred from airflow.jobs.job import Job from airflow.models import Trigger diff --git a/airflow/serialization/serializers/timezone.py b/airflow/serialization/serializers/timezone.py index a1f40e67c6972..3d2a29ea08bc4 100644 --- a/airflow/serialization/serializers/timezone.py +++ b/airflow/serialization/serializers/timezone.py @@ -18,7 +18,6 @@ from __future__ import annotations import datetime -import sys from typing import TYPE_CHECKING, Any, cast from airflow.utils.module_loading import qualname @@ -30,15 +29,9 @@ serializers = [ "pendulum.tz.timezone.FixedTimezone", "pendulum.tz.timezone.Timezone", + "zoneinfo.ZoneInfo", ] -PY39 = sys.version_info >= (3, 9) - -if PY39: - serializers.append("zoneinfo.ZoneInfo") -else: - serializers.append("backports.zoneinfo.ZoneInfo") - deserializers = serializers __version__ = 1 @@ -83,11 +76,8 @@ def deserialize(classname: str, version: int, data: object) -> Any: if version > __version__: raise TypeError(f"serialized {version} of {classname} > {__version__}") - if "zoneinfo.ZoneInfo" in classname: - try: - from zoneinfo import ZoneInfo - except ImportError: - from backports.zoneinfo import ZoneInfo + if classname == "backports.zoneinfo.ZoneInfo" and isinstance(data, str): + from zoneinfo import ZoneInfo return ZoneInfo(data) diff --git a/airflow/settings.py b/airflow/settings.py index 7a805f64a29c7..a6adbbcf9ff77 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -377,7 +377,8 @@ def is_called_from_test_code(self) -> tuple[bool, traceback.FrameSummary | None] and not tb.filename == AIRFLOW_UTILS_SESSION_PATH ] if any( - filename.endswith("conftest.py") or filename.endswith("tests/test_utils/db.py") + filename.endswith("conftest.py") + or filename.endswith("dev/airflow_common_pytest/test_utils/db.py") for filename, _, _, _ in airflow_frames ): # This is a fixture call or testing utilities diff --git a/airflow/ti_deps/dependencies_deps.py b/airflow/ti_deps/dependencies_deps.py index 44d6bfc5c7db7..c167cdb346325 100644 --- a/airflow/ti_deps/dependencies_deps.py +++ b/airflow/ti_deps/dependencies_deps.py @@ -17,7 +17,6 @@ from __future__ import annotations from airflow.ti_deps.dependencies_states import ( - BACKFILL_QUEUEABLE_STATES, QUEUEABLE_STATES, RUNNABLE_STATES, ) @@ -48,13 +47,6 @@ TaskNotRunningDep(), } -BACKFILL_QUEUED_DEPS = { - RunnableExecDateDep(), - ValidStateDep(BACKFILL_QUEUEABLE_STATES), - DagrunRunningDep(), - TaskNotRunningDep(), -} - # TODO(aoen): SCHEDULER_QUEUED_DEPS is not coupled to actual scheduling/execution # in any way and could easily be modified or removed from the scheduler causing # this dependency to become outdated and incorrect. This coupling should be created diff --git a/airflow/ti_deps/dependencies_states.py b/airflow/ti_deps/dependencies_states.py index fd25d62f6d75e..ebf581ab48e18 100644 --- a/airflow/ti_deps/dependencies_states.py +++ b/airflow/ti_deps/dependencies_states.py @@ -42,12 +42,3 @@ QUEUEABLE_STATES = { TaskInstanceState.SCHEDULED, } - -BACKFILL_QUEUEABLE_STATES = { - # For cases like unit tests and run manually - None, - TaskInstanceState.UP_FOR_RESCHEDULE, - TaskInstanceState.UP_FOR_RETRY, - # For normal backfill cases - TaskInstanceState.SCHEDULED, -} diff --git a/airflow/ui/.prettierignore b/airflow/ui/.prettierignore index a2bcd8157d69c..49a8631b874a0 100644 --- a/airflow/ui/.prettierignore +++ b/airflow/ui/.prettierignore @@ -3,3 +3,5 @@ templates/**/*.html dist/ *.md *.yaml +coverage/* +.pnpm-store diff --git a/airflow/ui/eslint.config.js b/airflow/ui/eslint.config.js index fcce70dff619a..31467415319b3 100644 --- a/airflow/ui/eslint.config.js +++ b/airflow/ui/eslint.config.js @@ -34,7 +34,7 @@ import { unicornRules } from "./rules/unicorn.js"; */ export default /** @type {const} @satisfies {ReadonlyArray} */ ([ // Global ignore of dist directory - { ignores: ["**/dist/"] }, + { ignores: ["**/dist/", "**coverage/"] }, // Base rules coreRules, typescriptRules, diff --git a/airflow/ui/openapi-gen/queries/common.ts b/airflow/ui/openapi-gen/queries/common.ts index ff7bb3995cb49..2f1c6a78d92f0 100644 --- a/airflow/ui/openapi-gen/queries/common.ts +++ b/airflow/ui/openapi-gen/queries/common.ts @@ -4,7 +4,10 @@ import { UseQueryResult } from "@tanstack/react-query"; import { AssetService, ConnectionService, + DagRunService, DagService, + DashboardService, + VariableService, } from "../requests/services.gen"; import { DagRunState } from "../requests/types.gen"; @@ -24,6 +27,28 @@ export const UseAssetServiceNextRunAssetsKeyFn = ( }, queryKey?: Array, ) => [useAssetServiceNextRunAssetsKey, ...(queryKey ?? [{ dagId }])]; +export type DashboardServiceHistoricalMetricsDefaultResponse = Awaited< + ReturnType +>; +export type DashboardServiceHistoricalMetricsQueryResult< + TData = DashboardServiceHistoricalMetricsDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useDashboardServiceHistoricalMetricsKey = + "DashboardServiceHistoricalMetrics"; +export const UseDashboardServiceHistoricalMetricsKeyFn = ( + { + endDate, + startDate, + }: { + endDate: string; + startDate: string; + }, + queryKey?: Array, +) => [ + useDashboardServiceHistoricalMetricsKey, + ...(queryKey ?? [{ endDate, startDate }]), +]; export type DagServiceGetDagsDefaultResponse = Awaited< ReturnType >; @@ -74,6 +99,22 @@ export const UseDagServiceGetDagsKeyFn = ( }, ]), ]; +export type DagServiceGetDagDefaultResponse = Awaited< + ReturnType +>; +export type DagServiceGetDagQueryResult< + TData = DagServiceGetDagDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useDagServiceGetDagKey = "DagServiceGetDag"; +export const UseDagServiceGetDagKeyFn = ( + { + dagId, + }: { + dagId: string; + }, + queryKey?: Array, +) => [useDagServiceGetDagKey, ...(queryKey ?? [{ dagId }])]; export type DagServiceGetDagDetailsDefaultResponse = Awaited< ReturnType >; @@ -110,12 +151,61 @@ export const UseConnectionServiceGetConnectionKeyFn = ( useConnectionServiceGetConnectionKey, ...(queryKey ?? [{ connectionId }]), ]; +export type VariableServiceGetVariableDefaultResponse = Awaited< + ReturnType +>; +export type VariableServiceGetVariableQueryResult< + TData = VariableServiceGetVariableDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useVariableServiceGetVariableKey = "VariableServiceGetVariable"; +export const UseVariableServiceGetVariableKeyFn = ( + { + variableKey, + }: { + variableKey: string; + }, + queryKey?: Array, +) => [useVariableServiceGetVariableKey, ...(queryKey ?? [{ variableKey }])]; +export type DagRunServiceGetDagRunDefaultResponse = Awaited< + ReturnType +>; +export type DagRunServiceGetDagRunQueryResult< + TData = DagRunServiceGetDagRunDefaultResponse, + TError = unknown, +> = UseQueryResult; +export const useDagRunServiceGetDagRunKey = "DagRunServiceGetDagRun"; +export const UseDagRunServiceGetDagRunKeyFn = ( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: Array, +) => [useDagRunServiceGetDagRunKey, ...(queryKey ?? [{ dagId, dagRunId }])]; +export type VariableServicePostVariableMutationResult = Awaited< + ReturnType +>; export type DagServicePatchDagsMutationResult = Awaited< ReturnType >; export type DagServicePatchDagMutationResult = Awaited< ReturnType >; +export type VariableServicePatchVariableMutationResult = Awaited< + ReturnType +>; +export type DagServiceDeleteDagMutationResult = Awaited< + ReturnType +>; export type ConnectionServiceDeleteConnectionMutationResult = Awaited< ReturnType >; +export type VariableServiceDeleteVariableMutationResult = Awaited< + ReturnType +>; +export type DagRunServiceDeleteDagRunMutationResult = Awaited< + ReturnType +>; diff --git a/airflow/ui/openapi-gen/queries/prefetch.ts b/airflow/ui/openapi-gen/queries/prefetch.ts index cbb43cca3abb1..3e194302f4be0 100644 --- a/airflow/ui/openapi-gen/queries/prefetch.ts +++ b/airflow/ui/openapi-gen/queries/prefetch.ts @@ -4,7 +4,10 @@ import { type QueryClient } from "@tanstack/react-query"; import { AssetService, ConnectionService, + DagRunService, DagService, + DashboardService, + VariableService, } from "../requests/services.gen"; import { DagRunState } from "../requests/types.gen"; import * as Common from "./common"; @@ -28,6 +31,32 @@ export const prefetchUseAssetServiceNextRunAssets = ( queryKey: Common.UseAssetServiceNextRunAssetsKeyFn({ dagId }), queryFn: () => AssetService.nextRunAssets({ dagId }), }); +/** + * Historical Metrics + * Return cluster activity historical metrics. + * @param data The data for the request. + * @param data.startDate + * @param data.endDate + * @returns HistoricalMetricDataResponse Successful Response + * @throws ApiError + */ +export const prefetchUseDashboardServiceHistoricalMetrics = ( + queryClient: QueryClient, + { + endDate, + startDate, + }: { + endDate: string; + startDate: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseDashboardServiceHistoricalMetricsKeyFn({ + endDate, + startDate, + }), + queryFn: () => DashboardService.historicalMetrics({ endDate, startDate }), + }); /** * Get Dags * Get all DAGs. @@ -98,6 +127,26 @@ export const prefetchUseDagServiceGetDags = ( tags, }), }); +/** + * Get Dag + * Get basic information about a DAG. + * @param data The data for the request. + * @param data.dagId + * @returns DAGResponse Successful Response + * @throws ApiError + */ +export const prefetchUseDagServiceGetDag = ( + queryClient: QueryClient, + { + dagId, + }: { + dagId: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseDagServiceGetDagKeyFn({ dagId }), + queryFn: () => DagService.getDag({ dagId }), + }); /** * Get Dag Details * Get details of DAG. @@ -138,3 +187,45 @@ export const prefetchUseConnectionServiceGetConnection = ( queryKey: Common.UseConnectionServiceGetConnectionKeyFn({ connectionId }), queryFn: () => ConnectionService.getConnection({ connectionId }), }); +/** + * Get Variable + * Get a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns VariableResponse Successful Response + * @throws ApiError + */ +export const prefetchUseVariableServiceGetVariable = ( + queryClient: QueryClient, + { + variableKey, + }: { + variableKey: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseVariableServiceGetVariableKeyFn({ variableKey }), + queryFn: () => VariableService.getVariable({ variableKey }), + }); +/** + * Get Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns DAGRunResponse Successful Response + * @throws ApiError + */ +export const prefetchUseDagRunServiceGetDagRun = ( + queryClient: QueryClient, + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, +) => + queryClient.prefetchQuery({ + queryKey: Common.UseDagRunServiceGetDagRunKeyFn({ dagId, dagRunId }), + queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }), + }); diff --git a/airflow/ui/openapi-gen/queries/queries.ts b/airflow/ui/openapi-gen/queries/queries.ts index 4aa627d74fd0c..a16bdf165b182 100644 --- a/airflow/ui/openapi-gen/queries/queries.ts +++ b/airflow/ui/openapi-gen/queries/queries.ts @@ -1,17 +1,20 @@ // generated with @7nohe/openapi-react-query-codegen@1.6.0 import { - useMutation, UseMutationOptions, - useQuery, UseQueryOptions, + useMutation, + useQuery, } from "@tanstack/react-query"; import { AssetService, ConnectionService, + DagRunService, DagService, + DashboardService, + VariableService, } from "../requests/services.gen"; -import { DAGPatchBody, DagRunState } from "../requests/types.gen"; +import { DAGPatchBody, DagRunState, VariableBody } from "../requests/types.gen"; import * as Common from "./common"; /** @@ -39,6 +42,39 @@ export const useAssetServiceNextRunAssets = < queryFn: () => AssetService.nextRunAssets({ dagId }) as TData, ...options, }); +/** + * Historical Metrics + * Return cluster activity historical metrics. + * @param data The data for the request. + * @param data.startDate + * @param data.endDate + * @returns HistoricalMetricDataResponse Successful Response + * @throws ApiError + */ +export const useDashboardServiceHistoricalMetrics = < + TData = Common.DashboardServiceHistoricalMetricsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + endDate, + startDate, + }: { + endDate: string; + startDate: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseDashboardServiceHistoricalMetricsKeyFn( + { endDate, startDate }, + queryKey, + ), + queryFn: () => + DashboardService.historicalMetrics({ endDate, startDate }) as TData, + ...options, + }); /** * Get Dags * Get all DAGs. @@ -118,6 +154,32 @@ export const useDagServiceGetDags = < }) as TData, ...options, }); +/** + * Get Dag + * Get basic information about a DAG. + * @param data The data for the request. + * @param data.dagId + * @returns DAGResponse Successful Response + * @throws ApiError + */ +export const useDagServiceGetDag = < + TData = Common.DagServiceGetDagDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + }: { + dagId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseDagServiceGetDagKeyFn({ dagId }, queryKey), + queryFn: () => DagService.getDag({ dagId }) as TData, + ...options, + }); /** * Get Dag Details * Get details of DAG. @@ -173,6 +235,105 @@ export const useConnectionServiceGetConnection = < queryFn: () => ConnectionService.getConnection({ connectionId }) as TData, ...options, }); +/** + * Get Variable + * Get a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns VariableResponse Successful Response + * @throws ApiError + */ +export const useVariableServiceGetVariable = < + TData = Common.VariableServiceGetVariableDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + variableKey, + }: { + variableKey: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseVariableServiceGetVariableKeyFn( + { variableKey }, + queryKey, + ), + queryFn: () => VariableService.getVariable({ variableKey }) as TData, + ...options, + }); +/** + * Get Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns DAGRunResponse Successful Response + * @throws ApiError + */ +export const useDagRunServiceGetDagRun = < + TData = Common.DagRunServiceGetDagRunDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useQuery({ + queryKey: Common.UseDagRunServiceGetDagRunKeyFn( + { dagId, dagRunId }, + queryKey, + ), + queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }) as TData, + ...options, + }); +/** + * Post Variable + * Create a variable. + * @param data The data for the request. + * @param data.requestBody + * @returns VariableResponse Successful Response + * @throws ApiError + */ +export const useVariableServicePostVariable = < + TData = Common.VariableServicePostVariableMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + requestBody: VariableBody; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + requestBody: VariableBody; + }, + TContext + >({ + mutationFn: ({ requestBody }) => + VariableService.postVariable({ + requestBody, + }) as unknown as Promise, + ...options, + }); /** * Patch Dags * Patch multiple DAGs. @@ -306,6 +467,90 @@ export const useDagServicePatchDag = < }) as unknown as Promise, ...options, }); +/** + * Patch Variable + * Update a variable by key. + * @param data The data for the request. + * @param data.variableKey + * @param data.requestBody + * @param data.updateMask + * @returns VariableResponse Successful Response + * @throws ApiError + */ +export const useVariableServicePatchVariable = < + TData = Common.VariableServicePatchVariableMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + requestBody: VariableBody; + updateMask?: string[]; + variableKey: string; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + requestBody: VariableBody; + updateMask?: string[]; + variableKey: string; + }, + TContext + >({ + mutationFn: ({ requestBody, updateMask, variableKey }) => + VariableService.patchVariable({ + requestBody, + updateMask, + variableKey, + }) as unknown as Promise, + ...options, + }); +/** + * Delete Dag + * Delete the specific DAG. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ +export const useDagServiceDeleteDag = < + TData = Common.DagServiceDeleteDagMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + dagId: string; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + dagId: string; + }, + TContext + >({ + mutationFn: ({ dagId }) => + DagService.deleteDag({ dagId }) as unknown as Promise, + ...options, + }); /** * Delete Connection * Delete a connection entry. @@ -345,3 +590,85 @@ export const useConnectionServiceDeleteConnection = < }) as unknown as Promise, ...options, }); +/** + * Delete Variable + * Delete a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns void Successful Response + * @throws ApiError + */ +export const useVariableServiceDeleteVariable = < + TData = Common.VariableServiceDeleteVariableMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + variableKey: string; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + variableKey: string; + }, + TContext + >({ + mutationFn: ({ variableKey }) => + VariableService.deleteVariable({ + variableKey, + }) as unknown as Promise, + ...options, + }); +/** + * Delete Dag Run + * Delete a DAG Run entry. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns void Successful Response + * @throws ApiError + */ +export const useDagRunServiceDeleteDagRun = < + TData = Common.DagRunServiceDeleteDagRunMutationResult, + TError = unknown, + TContext = unknown, +>( + options?: Omit< + UseMutationOptions< + TData, + TError, + { + dagId: string; + dagRunId: string; + }, + TContext + >, + "mutationFn" + >, +) => + useMutation< + TData, + TError, + { + dagId: string; + dagRunId: string; + }, + TContext + >({ + mutationFn: ({ dagId, dagRunId }) => + DagRunService.deleteDagRun({ + dagId, + dagRunId, + }) as unknown as Promise, + ...options, + }); diff --git a/airflow/ui/openapi-gen/queries/suspense.ts b/airflow/ui/openapi-gen/queries/suspense.ts index 04d7eb94b3208..79ad479f0a42f 100644 --- a/airflow/ui/openapi-gen/queries/suspense.ts +++ b/airflow/ui/openapi-gen/queries/suspense.ts @@ -4,7 +4,10 @@ import { UseQueryOptions, useSuspenseQuery } from "@tanstack/react-query"; import { AssetService, ConnectionService, + DagRunService, DagService, + DashboardService, + VariableService, } from "../requests/services.gen"; import { DagRunState } from "../requests/types.gen"; import * as Common from "./common"; @@ -34,6 +37,39 @@ export const useAssetServiceNextRunAssetsSuspense = < queryFn: () => AssetService.nextRunAssets({ dagId }) as TData, ...options, }); +/** + * Historical Metrics + * Return cluster activity historical metrics. + * @param data The data for the request. + * @param data.startDate + * @param data.endDate + * @returns HistoricalMetricDataResponse Successful Response + * @throws ApiError + */ +export const useDashboardServiceHistoricalMetricsSuspense = < + TData = Common.DashboardServiceHistoricalMetricsDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + endDate, + startDate, + }: { + endDate: string; + startDate: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseDashboardServiceHistoricalMetricsKeyFn( + { endDate, startDate }, + queryKey, + ), + queryFn: () => + DashboardService.historicalMetrics({ endDate, startDate }) as TData, + ...options, + }); /** * Get Dags * Get all DAGs. @@ -113,6 +149,32 @@ export const useDagServiceGetDagsSuspense = < }) as TData, ...options, }); +/** + * Get Dag + * Get basic information about a DAG. + * @param data The data for the request. + * @param data.dagId + * @returns DAGResponse Successful Response + * @throws ApiError + */ +export const useDagServiceGetDagSuspense = < + TData = Common.DagServiceGetDagDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + }: { + dagId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseDagServiceGetDagKeyFn({ dagId }, queryKey), + queryFn: () => DagService.getDag({ dagId }) as TData, + ...options, + }); /** * Get Dag Details * Get details of DAG. @@ -168,3 +230,63 @@ export const useConnectionServiceGetConnectionSuspense = < queryFn: () => ConnectionService.getConnection({ connectionId }) as TData, ...options, }); +/** + * Get Variable + * Get a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns VariableResponse Successful Response + * @throws ApiError + */ +export const useVariableServiceGetVariableSuspense = < + TData = Common.VariableServiceGetVariableDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + variableKey, + }: { + variableKey: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseVariableServiceGetVariableKeyFn( + { variableKey }, + queryKey, + ), + queryFn: () => VariableService.getVariable({ variableKey }) as TData, + ...options, + }); +/** + * Get Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns DAGRunResponse Successful Response + * @throws ApiError + */ +export const useDagRunServiceGetDagRunSuspense = < + TData = Common.DagRunServiceGetDagRunDefaultResponse, + TError = unknown, + TQueryKey extends Array = unknown[], +>( + { + dagId, + dagRunId, + }: { + dagId: string; + dagRunId: string; + }, + queryKey?: TQueryKey, + options?: Omit, "queryKey" | "queryFn">, +) => + useSuspenseQuery({ + queryKey: Common.UseDagRunServiceGetDagRunKeyFn( + { dagId, dagRunId }, + queryKey, + ), + queryFn: () => DagRunService.getDagRun({ dagId, dagRunId }) as TData, + ...options, + }); diff --git a/airflow/ui/openapi-gen/requests/schemas.gen.ts b/airflow/ui/openapi-gen/requests/schemas.gen.ts index 910354423bac3..e42a3f6572ca9 100644 --- a/airflow/ui/openapi-gen/requests/schemas.gen.ts +++ b/airflow/ui/openapi-gen/requests/schemas.gen.ts @@ -526,7 +526,7 @@ export const $DAGPatchBody = { type: "object", required: ["is_paused"], title: "DAGPatchBody", - description: "Dag Serializer for updatable body.", + description: "Dag Serializer for updatable bodies.", } as const; export const $DAGResponse = { @@ -784,6 +784,195 @@ export const $DAGResponse = { description: "DAG serializer for responses.", } as const; +export const $DAGRunResponse = { + properties: { + run_id: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Run Id", + }, + dag_id: { + type: "string", + title: "Dag Id", + }, + logical_date: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Logical Date", + }, + start_date: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Start Date", + }, + end_date: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "End Date", + }, + data_interval_start: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Data Interval Start", + }, + data_interval_end: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Data Interval End", + }, + last_scheduling_decision: { + anyOf: [ + { + type: "string", + format: "date-time", + }, + { + type: "null", + }, + ], + title: "Last Scheduling Decision", + }, + run_type: { + $ref: "#/components/schemas/DagRunType", + }, + state: { + $ref: "#/components/schemas/DagRunState", + }, + external_trigger: { + type: "boolean", + title: "External Trigger", + }, + triggered_by: { + $ref: "#/components/schemas/DagRunTriggeredByType", + }, + conf: { + type: "object", + title: "Conf", + }, + note: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Note", + }, + }, + type: "object", + required: [ + "run_id", + "dag_id", + "logical_date", + "start_date", + "end_date", + "data_interval_start", + "data_interval_end", + "last_scheduling_decision", + "run_type", + "state", + "external_trigger", + "triggered_by", + "conf", + "note", + ], + title: "DAGRunResponse", + description: "DAG Run serializer for responses.", +} as const; + +export const $DAGRunStates = { + properties: { + queued: { + type: "integer", + title: "Queued", + }, + running: { + type: "integer", + title: "Running", + }, + success: { + type: "integer", + title: "Success", + }, + failed: { + type: "integer", + title: "Failed", + }, + }, + type: "object", + required: ["queued", "running", "success", "failed"], + title: "DAGRunStates", + description: "DAG Run States for responses.", +} as const; + +export const $DAGRunTypes = { + properties: { + backfill: { + type: "integer", + title: "Backfill", + }, + scheduled: { + type: "integer", + title: "Scheduled", + }, + manual: { + type: "integer", + title: "Manual", + }, + dataset_triggered: { + type: "integer", + title: "Dataset Triggered", + }, + }, + type: "object", + required: ["backfill", "scheduled", "manual", "dataset_triggered"], + title: "DAGRunTypes", + description: "DAG Run Types for responses.", +} as const; + export const $DagRunState = { type: "string", enum: ["queued", "running", "success", "failed"], @@ -795,6 +984,29 @@ so please ensure that their values always match the ones with the same name in TaskInstanceState.`, } as const; +export const $DagRunTriggeredByType = { + type: "string", + enum: [ + "cli", + "operator", + "rest_api", + "ui", + "test", + "timetable", + "dataset", + "backfill", + ], + title: "DagRunTriggeredByType", + description: "Class with TriggeredBy types for DagRun.", +} as const; + +export const $DagRunType = { + type: "string", + enum: ["backfill", "scheduled", "manual", "dataset_triggered"], + title: "DagRunType", + description: "Class with DagRun types.", +} as const; + export const $DagTagPydantic = { properties: { name: { @@ -847,6 +1059,99 @@ export const $HTTPValidationError = { title: "HTTPValidationError", } as const; +export const $HistoricalMetricDataResponse = { + properties: { + dag_run_types: { + $ref: "#/components/schemas/DAGRunTypes", + }, + dag_run_states: { + $ref: "#/components/schemas/DAGRunStates", + }, + task_instance_states: { + $ref: "#/components/schemas/TaskInstantState", + }, + }, + type: "object", + required: ["dag_run_types", "dag_run_states", "task_instance_states"], + title: "HistoricalMetricDataResponse", + description: "Historical Metric Data serializer for responses.", +} as const; + +export const $TaskInstantState = { + properties: { + no_status: { + type: "integer", + title: "No Status", + }, + removed: { + type: "integer", + title: "Removed", + }, + scheduled: { + type: "integer", + title: "Scheduled", + }, + queued: { + type: "integer", + title: "Queued", + }, + running: { + type: "integer", + title: "Running", + }, + success: { + type: "integer", + title: "Success", + }, + restarting: { + type: "integer", + title: "Restarting", + }, + failed: { + type: "integer", + title: "Failed", + }, + up_for_retry: { + type: "integer", + title: "Up For Retry", + }, + up_for_reschedule: { + type: "integer", + title: "Up For Reschedule", + }, + upstream_failed: { + type: "integer", + title: "Upstream Failed", + }, + skipped: { + type: "integer", + title: "Skipped", + }, + deferred: { + type: "integer", + title: "Deferred", + }, + }, + type: "object", + required: [ + "no_status", + "removed", + "scheduled", + "queued", + "running", + "success", + "restarting", + "failed", + "up_for_retry", + "up_for_reschedule", + "upstream_failed", + "skipped", + "deferred", + ], + title: "TaskInstantState", + description: "TaskInstance serializer for responses.", +} as const; + export const $ValidationError = { properties: { loc: { @@ -876,3 +1181,73 @@ export const $ValidationError = { required: ["loc", "msg", "type"], title: "ValidationError", } as const; + +export const $VariableBody = { + properties: { + key: { + type: "string", + title: "Key", + }, + description: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Description", + }, + value: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Value", + }, + }, + type: "object", + required: ["key", "description", "value"], + title: "VariableBody", + description: "Variable serializer for bodies.", +} as const; + +export const $VariableResponse = { + properties: { + key: { + type: "string", + title: "Key", + }, + description: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Description", + }, + value: { + anyOf: [ + { + type: "string", + }, + { + type: "null", + }, + ], + title: "Value", + }, + }, + type: "object", + required: ["key", "description", "value"], + title: "VariableResponse", + description: "Variable serializer for responses.", +} as const; diff --git a/airflow/ui/openapi-gen/requests/services.gen.ts b/airflow/ui/openapi-gen/requests/services.gen.ts index 023a2a458dd7b..8d7f0cee2b295 100644 --- a/airflow/ui/openapi-gen/requests/services.gen.ts +++ b/airflow/ui/openapi-gen/requests/services.gen.ts @@ -5,18 +5,36 @@ import { request as __request } from "./core/request"; import type { NextRunAssetsData, NextRunAssetsResponse, + HistoricalMetricsData, + HistoricalMetricsResponse, GetDagsData, GetDagsResponse, PatchDagsData, PatchDagsResponse, - GetDagDetailsData, - GetDagDetailsResponse, + GetDagData, + GetDagResponse, PatchDagData, PatchDagResponse, + DeleteDagData, + DeleteDagResponse, + GetDagDetailsData, + GetDagDetailsResponse, DeleteConnectionData, DeleteConnectionResponse, GetConnectionData, GetConnectionResponse, + DeleteVariableData, + DeleteVariableResponse, + GetVariableData, + GetVariableResponse, + PatchVariableData, + PatchVariableResponse, + PostVariableData, + PostVariableResponse, + GetDagRunData, + GetDagRunResponse, + DeleteDagRunData, + DeleteDagRunResponse, } from "./types.gen"; export class AssetService { @@ -43,6 +61,34 @@ export class AssetService { } } +export class DashboardService { + /** + * Historical Metrics + * Return cluster activity historical metrics. + * @param data The data for the request. + * @param data.startDate + * @param data.endDate + * @returns HistoricalMetricDataResponse Successful Response + * @throws ApiError + */ + public static historicalMetrics( + data: HistoricalMetricsData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/ui/dashboard/historical_metrics_data", + query: { + start_date: data.startDate, + end_date: data.endDate, + }, + errors: { + 400: "Bad Request", + 422: "Validation Error", + }, + }); + } +} + export class DagService { /** * Get Dags @@ -66,7 +112,7 @@ export class DagService { ): CancelablePromise { return __request(OpenAPI, { method: "GET", - url: "/public/dags", + url: "/public/dags/", query: { limit: data.limit, offset: data.offset, @@ -107,7 +153,7 @@ export class DagService { ): CancelablePromise { return __request(OpenAPI, { method: "PATCH", - url: "/public/dags", + url: "/public/dags/", query: { update_mask: data.updateMask, limit: data.limit, @@ -132,19 +178,17 @@ export class DagService { } /** - * Get Dag Details - * Get details of DAG. + * Get Dag + * Get basic information about a DAG. * @param data The data for the request. * @param data.dagId - * @returns DAGDetailsResponse Successful Response + * @returns DAGResponse Successful Response * @throws ApiError */ - public static getDagDetails( - data: GetDagDetailsData, - ): CancelablePromise { + public static getDag(data: GetDagData): CancelablePromise { return __request(OpenAPI, { method: "GET", - url: "/public/dags/{dag_id}/details", + url: "/public/dags/{dag_id}", path: { dag_id: data.dagId, }, @@ -191,6 +235,60 @@ export class DagService { }, }); } + + /** + * Delete Dag + * Delete the specific DAG. + * @param data The data for the request. + * @param data.dagId + * @returns unknown Successful Response + * @throws ApiError + */ + public static deleteDag( + data: DeleteDagData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "DELETE", + url: "/public/dags/{dag_id}", + path: { + dag_id: data.dagId, + }, + errors: { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Unprocessable Entity", + }, + }); + } + + /** + * Get Dag Details + * Get details of DAG. + * @param data The data for the request. + * @param data.dagId + * @returns DAGDetailsResponse Successful Response + * @throws ApiError + */ + public static getDagDetails( + data: GetDagDetailsData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/dags/{dag_id}/details", + path: { + dag_id: data.dagId, + }, + errors: { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Unprocessable Entity", + }, + }); + } } export class ConnectionService { @@ -246,3 +344,173 @@ export class ConnectionService { }); } } + +export class VariableService { + /** + * Delete Variable + * Delete a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns void Successful Response + * @throws ApiError + */ + public static deleteVariable( + data: DeleteVariableData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "DELETE", + url: "/public/variables/{variable_key}", + path: { + variable_key: data.variableKey, + }, + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Get Variable + * Get a variable entry. + * @param data The data for the request. + * @param data.variableKey + * @returns VariableResponse Successful Response + * @throws ApiError + */ + public static getVariable( + data: GetVariableData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/variables/{variable_key}", + path: { + variable_key: data.variableKey, + }, + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Patch Variable + * Update a variable by key. + * @param data The data for the request. + * @param data.variableKey + * @param data.requestBody + * @param data.updateMask + * @returns VariableResponse Successful Response + * @throws ApiError + */ + public static patchVariable( + data: PatchVariableData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "PATCH", + url: "/public/variables/{variable_key}", + path: { + variable_key: data.variableKey, + }, + query: { + update_mask: data.updateMask, + }, + body: data.requestBody, + mediaType: "application/json", + errors: { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Post Variable + * Create a variable. + * @param data The data for the request. + * @param data.requestBody + * @returns VariableResponse Successful Response + * @throws ApiError + */ + public static postVariable( + data: PostVariableData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "POST", + url: "/public/variables/", + body: data.requestBody, + mediaType: "application/json", + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 422: "Validation Error", + }, + }); + } +} + +export class DagRunService { + /** + * Get Dag Run + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns DAGRunResponse Successful Response + * @throws ApiError + */ + public static getDagRun( + data: GetDagRunData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "GET", + url: "/public/dags/{dag_id}/dagRuns/{dag_run_id}", + path: { + dag_id: data.dagId, + dag_run_id: data.dagRunId, + }, + errors: { + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } + + /** + * Delete Dag Run + * Delete a DAG Run entry. + * @param data The data for the request. + * @param data.dagId + * @param data.dagRunId + * @returns void Successful Response + * @throws ApiError + */ + public static deleteDagRun( + data: DeleteDagRunData, + ): CancelablePromise { + return __request(OpenAPI, { + method: "DELETE", + url: "/public/dags/{dag_id}/dagRuns/{dag_run_id}", + path: { + dag_id: data.dagId, + dag_run_id: data.dagRunId, + }, + errors: { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 422: "Validation Error", + }, + }); + } +} diff --git a/airflow/ui/openapi-gen/requests/types.gen.ts b/airflow/ui/openapi-gen/requests/types.gen.ts index 65a2db8926510..7f603a1adb4d3 100644 --- a/airflow/ui/openapi-gen/requests/types.gen.ts +++ b/airflow/ui/openapi-gen/requests/types.gen.ts @@ -79,7 +79,7 @@ export type DAGDetailsResponse = { }; /** - * Dag Serializer for updatable body. + * Dag Serializer for updatable bodies. */ export type DAGPatchBody = { is_paused: boolean; @@ -120,6 +120,48 @@ export type DAGResponse = { readonly file_token: string; }; +/** + * DAG Run serializer for responses. + */ +export type DAGRunResponse = { + run_id: string | null; + dag_id: string; + logical_date: string | null; + start_date: string | null; + end_date: string | null; + data_interval_start: string | null; + data_interval_end: string | null; + last_scheduling_decision: string | null; + run_type: DagRunType; + state: DagRunState; + external_trigger: boolean; + triggered_by: DagRunTriggeredByType; + conf: { + [key: string]: unknown; + }; + note: string | null; +}; + +/** + * DAG Run States for responses. + */ +export type DAGRunStates = { + queued: number; + running: number; + success: number; + failed: number; +}; + +/** + * DAG Run Types for responses. + */ +export type DAGRunTypes = { + backfill: number; + scheduled: number; + manual: number; + dataset_triggered: number; +}; + /** * All possible states that a DagRun can be in. * @@ -129,6 +171,28 @@ export type DAGResponse = { */ export type DagRunState = "queued" | "running" | "success" | "failed"; +/** + * Class with TriggeredBy types for DagRun. + */ +export type DagRunTriggeredByType = + | "cli" + | "operator" + | "rest_api" + | "ui" + | "test" + | "timetable" + | "dataset" + | "backfill"; + +/** + * Class with DagRun types. + */ +export type DagRunType = + | "backfill" + | "scheduled" + | "manual" + | "dataset_triggered"; + /** * Serializable representation of the DagTag ORM SqlAlchemyModel used by internal API. */ @@ -152,12 +216,58 @@ export type HTTPValidationError = { detail?: Array; }; +/** + * Historical Metric Data serializer for responses. + */ +export type HistoricalMetricDataResponse = { + dag_run_types: DAGRunTypes; + dag_run_states: DAGRunStates; + task_instance_states: TaskInstantState; +}; + +/** + * TaskInstance serializer for responses. + */ +export type TaskInstantState = { + no_status: number; + removed: number; + scheduled: number; + queued: number; + running: number; + success: number; + restarting: number; + failed: number; + up_for_retry: number; + up_for_reschedule: number; + upstream_failed: number; + skipped: number; + deferred: number; +}; + export type ValidationError = { loc: Array; msg: string; type: string; }; +/** + * Variable serializer for bodies. + */ +export type VariableBody = { + key: string; + description: string | null; + value: string | null; +}; + +/** + * Variable serializer for responses. + */ +export type VariableResponse = { + key: string; + description: string | null; + value: string | null; +}; + export type NextRunAssetsData = { dagId: string; }; @@ -166,6 +276,13 @@ export type NextRunAssetsResponse = { [key: string]: unknown; }; +export type HistoricalMetricsData = { + endDate: string; + startDate: string; +}; + +export type HistoricalMetricsResponse = HistoricalMetricDataResponse; + export type GetDagsData = { dagDisplayNamePattern?: string | null; dagIdPattern?: string | null; @@ -196,11 +313,11 @@ export type PatchDagsData = { export type PatchDagsResponse = DAGCollectionResponse; -export type GetDagDetailsData = { +export type GetDagData = { dagId: string; }; -export type GetDagDetailsResponse = DAGDetailsResponse; +export type GetDagResponse = DAGResponse; export type PatchDagData = { dagId: string; @@ -210,6 +327,18 @@ export type PatchDagData = { export type PatchDagResponse = DAGResponse; +export type DeleteDagData = { + dagId: string; +}; + +export type DeleteDagResponse = unknown; + +export type GetDagDetailsData = { + dagId: string; +}; + +export type GetDagDetailsResponse = DAGDetailsResponse; + export type DeleteConnectionData = { connectionId: string; }; @@ -222,6 +351,46 @@ export type GetConnectionData = { export type GetConnectionResponse = ConnectionResponse; +export type DeleteVariableData = { + variableKey: string; +}; + +export type DeleteVariableResponse = void; + +export type GetVariableData = { + variableKey: string; +}; + +export type GetVariableResponse = VariableResponse; + +export type PatchVariableData = { + requestBody: VariableBody; + updateMask?: Array | null; + variableKey: string; +}; + +export type PatchVariableResponse = VariableResponse; + +export type PostVariableData = { + requestBody: VariableBody; +}; + +export type PostVariableResponse = VariableResponse; + +export type GetDagRunData = { + dagId: string; + dagRunId: string; +}; + +export type GetDagRunResponse = DAGRunResponse; + +export type DeleteDagRunData = { + dagId: string; + dagRunId: string; +}; + +export type DeleteDagRunResponse = void; + export type $OpenApiTs = { "/ui/next_run_assets/{dag_id}": { get: { @@ -240,7 +409,26 @@ export type $OpenApiTs = { }; }; }; - "/public/dags": { + "/ui/dashboard/historical_metrics_data": { + get: { + req: HistoricalMetricsData; + res: { + /** + * Successful Response + */ + 200: HistoricalMetricDataResponse; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/public/dags/": { get: { req: GetDagsData; res: { @@ -284,14 +472,14 @@ export type $OpenApiTs = { }; }; }; - "/public/dags/{dag_id}/details": { + "/public/dags/{dag_id}": { get: { - req: GetDagDetailsData; + req: GetDagData; res: { /** * Successful Response */ - 200: DAGDetailsResponse; + 200: DAGResponse; /** * Bad Request */ @@ -314,8 +502,6 @@ export type $OpenApiTs = { 422: HTTPExceptionResponse; }; }; - }; - "/public/dags/{dag_id}": { patch: { req: PatchDagData; res: { @@ -345,6 +531,66 @@ export type $OpenApiTs = { 422: HTTPValidationError; }; }; + delete: { + req: DeleteDagData; + res: { + /** + * Successful Response + */ + 200: unknown; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Unprocessable Entity + */ + 422: HTTPExceptionResponse; + }; + }; + }; + "/public/dags/{dag_id}/details": { + get: { + req: GetDagDetailsData; + res: { + /** + * Successful Response + */ + 200: DAGDetailsResponse; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Unprocessable Entity + */ + 422: HTTPExceptionResponse; + }; + }; }; "/public/connections/{connection_id}": { delete: { @@ -398,4 +644,164 @@ export type $OpenApiTs = { }; }; }; + "/public/variables/{variable_key}": { + delete: { + req: DeleteVariableData; + res: { + /** + * Successful Response + */ + 204: void; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + get: { + req: GetVariableData; + res: { + /** + * Successful Response + */ + 200: VariableResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + patch: { + req: PatchVariableData; + res: { + /** + * Successful Response + */ + 200: VariableResponse; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/public/variables/": { + post: { + req: PostVariableData; + res: { + /** + * Successful Response + */ + 201: VariableResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; + "/public/dags/{dag_id}/dagRuns/{dag_run_id}": { + get: { + req: GetDagRunData; + res: { + /** + * Successful Response + */ + 200: DAGRunResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + delete: { + req: DeleteDagRunData; + res: { + /** + * Successful Response + */ + 204: void; + /** + * Bad Request + */ + 400: HTTPExceptionResponse; + /** + * Unauthorized + */ + 401: HTTPExceptionResponse; + /** + * Forbidden + */ + 403: HTTPExceptionResponse; + /** + * Not Found + */ + 404: HTTPExceptionResponse; + /** + * Validation Error + */ + 422: HTTPValidationError; + }; + }; + }; }; diff --git a/airflow/ui/package.json b/airflow/ui/package.json index 82c6370f9dcba..a7cf90bb5731b 100644 --- a/airflow/ui/package.json +++ b/airflow/ui/package.json @@ -28,7 +28,8 @@ "react": "^18.3.1", "react-dom": "^18.3.1", "react-icons": "^5.3.0", - "react-router-dom": "^6.26.2" + "react-router-dom": "^6.26.2", + "use-debounce": "^10.0.3" }, "devDependencies": { "@7nohe/openapi-react-query-codegen": "^1.6.0", @@ -58,7 +59,7 @@ "prettier": "^3.3.3", "typescript": "~5.5.4", "typescript-eslint": "^8.5.0", - "vite": "^5.4.4", + "vite": "^5.4.6", "vitest": "^2.1.1" } } diff --git a/airflow/ui/pnpm-lock.yaml b/airflow/ui/pnpm-lock.yaml index 515e7fea5279d..3b73df0fa8049 100644 --- a/airflow/ui/pnpm-lock.yaml +++ b/airflow/ui/pnpm-lock.yaml @@ -47,6 +47,9 @@ importers: react-router-dom: specifier: ^6.26.2 version: 6.26.2(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + use-debounce: + specifier: ^10.0.3 + version: 10.0.3(react@18.3.1) devDependencies: '@7nohe/openapi-react-query-codegen': specifier: ^1.6.0 @@ -83,7 +86,7 @@ importers: version: 18.3.0 '@vitejs/plugin-react-swc': specifier: ^3.7.0 - version: 3.7.0(vite@5.4.4(@types/node@22.5.4)) + version: 3.7.0(vite@5.4.6(@types/node@22.5.4)) '@vitest/coverage-v8': specifier: ^2.1.1 version: 2.1.1(vitest@2.1.1(@types/node@22.5.4)(happy-dom@15.0.0)) @@ -130,8 +133,8 @@ importers: specifier: ^8.5.0 version: 8.5.0(eslint@9.10.0(jiti@1.21.6))(typescript@5.5.4) vite: - specifier: ^5.4.4 - version: 5.4.4(@types/node@22.5.4) + specifier: ^5.4.6 + version: 5.4.6(@types/node@22.5.4) vitest: specifier: ^2.1.1 version: 2.1.1(@types/node@22.5.4)(happy-dom@15.0.0) @@ -1027,83 +1030,83 @@ packages: resolution: {integrity: sha512-baiMx18+IMuD1yyvOGaHM9QrVUPGGG0jC+z+IPHnRJWUAUvaKuWKyE8gjDj2rzv3sz9zOGoRSPgeBVHRhZnBlA==} engines: {node: '>=14.0.0'} - '@rollup/rollup-android-arm-eabi@4.21.0': - resolution: {integrity: sha512-WTWD8PfoSAJ+qL87lE7votj3syLavxunWhzCnx3XFxFiI/BA/r3X7MUM8dVrH8rb2r4AiO8jJsr3ZjdaftmnfA==} + '@rollup/rollup-android-arm-eabi@4.24.0': + resolution: {integrity: sha512-Q6HJd7Y6xdB48x8ZNVDOqsbh2uByBhgK8PiQgPhwkIw/HC/YX5Ghq2mQY5sRMZWHb3VsFkWooUVOZHKr7DmDIA==} cpu: [arm] os: [android] - '@rollup/rollup-android-arm64@4.21.0': - resolution: {integrity: sha512-a1sR2zSK1B4eYkiZu17ZUZhmUQcKjk2/j9Me2IDjk1GHW7LB5Z35LEzj9iJch6gtUfsnvZs1ZNyDW2oZSThrkA==} + '@rollup/rollup-android-arm64@4.24.0': + resolution: {integrity: sha512-ijLnS1qFId8xhKjT81uBHuuJp2lU4x2yxa4ctFPtG+MqEE6+C5f/+X/bStmxapgmwLwiL3ih122xv8kVARNAZA==} cpu: [arm64] os: [android] - '@rollup/rollup-darwin-arm64@4.21.0': - resolution: {integrity: sha512-zOnKWLgDld/svhKO5PD9ozmL6roy5OQ5T4ThvdYZLpiOhEGY+dp2NwUmxK0Ld91LrbjrvtNAE0ERBwjqhZTRAA==} + '@rollup/rollup-darwin-arm64@4.24.0': + resolution: {integrity: sha512-bIv+X9xeSs1XCk6DVvkO+S/z8/2AMt/2lMqdQbMrmVpgFvXlmde9mLcbQpztXm1tajC3raFDqegsH18HQPMYtA==} cpu: [arm64] os: [darwin] - '@rollup/rollup-darwin-x64@4.21.0': - resolution: {integrity: sha512-7doS8br0xAkg48SKE2QNtMSFPFUlRdw9+votl27MvT46vo44ATBmdZdGysOevNELmZlfd+NEa0UYOA8f01WSrg==} + '@rollup/rollup-darwin-x64@4.24.0': + resolution: {integrity: sha512-X6/nOwoFN7RT2svEQWUsW/5C/fYMBe4fnLK9DQk4SX4mgVBiTA9h64kjUYPvGQ0F/9xwJ5U5UfTbl6BEjaQdBQ==} cpu: [x64] os: [darwin] - '@rollup/rollup-linux-arm-gnueabihf@4.21.0': - resolution: {integrity: sha512-pWJsfQjNWNGsoCq53KjMtwdJDmh/6NubwQcz52aEwLEuvx08bzcy6tOUuawAOncPnxz/3siRtd8hiQ32G1y8VA==} + '@rollup/rollup-linux-arm-gnueabihf@4.24.0': + resolution: {integrity: sha512-0KXvIJQMOImLCVCz9uvvdPgfyWo93aHHp8ui3FrtOP57svqrF/roSSR5pjqL2hcMp0ljeGlU4q9o/rQaAQ3AYA==} cpu: [arm] os: [linux] - '@rollup/rollup-linux-arm-musleabihf@4.21.0': - resolution: {integrity: sha512-efRIANsz3UHZrnZXuEvxS9LoCOWMGD1rweciD6uJQIx2myN3a8Im1FafZBzh7zk1RJ6oKcR16dU3UPldaKd83w==} + '@rollup/rollup-linux-arm-musleabihf@4.24.0': + resolution: {integrity: sha512-it2BW6kKFVh8xk/BnHfakEeoLPv8STIISekpoF+nBgWM4d55CZKc7T4Dx1pEbTnYm/xEKMgy1MNtYuoA8RFIWw==} cpu: [arm] os: [linux] - '@rollup/rollup-linux-arm64-gnu@4.21.0': - resolution: {integrity: sha512-ZrPhydkTVhyeGTW94WJ8pnl1uroqVHM3j3hjdquwAcWnmivjAwOYjTEAuEDeJvGX7xv3Z9GAvrBkEzCgHq9U1w==} + '@rollup/rollup-linux-arm64-gnu@4.24.0': + resolution: {integrity: sha512-i0xTLXjqap2eRfulFVlSnM5dEbTVque/3Pi4g2y7cxrs7+a9De42z4XxKLYJ7+OhE3IgxvfQM7vQc43bwTgPwA==} cpu: [arm64] os: [linux] - '@rollup/rollup-linux-arm64-musl@4.21.0': - resolution: {integrity: sha512-cfaupqd+UEFeURmqNP2eEvXqgbSox/LHOyN9/d2pSdV8xTrjdg3NgOFJCtc1vQ/jEke1qD0IejbBfxleBPHnPw==} + '@rollup/rollup-linux-arm64-musl@4.24.0': + resolution: {integrity: sha512-9E6MKUJhDuDh604Qco5yP/3qn3y7SLXYuiC0Rpr89aMScS2UAmK1wHP2b7KAa1nSjWJc/f/Lc0Wl1L47qjiyQw==} cpu: [arm64] os: [linux] - '@rollup/rollup-linux-powerpc64le-gnu@4.21.0': - resolution: {integrity: sha512-ZKPan1/RvAhrUylwBXC9t7B2hXdpb/ufeu22pG2psV7RN8roOfGurEghw1ySmX/CmDDHNTDDjY3lo9hRlgtaHg==} + '@rollup/rollup-linux-powerpc64le-gnu@4.24.0': + resolution: {integrity: sha512-2XFFPJ2XMEiF5Zi2EBf4h73oR1V/lycirxZxHZNc93SqDN/IWhYYSYj8I9381ikUFXZrz2v7r2tOVk2NBwxrWw==} cpu: [ppc64] os: [linux] - '@rollup/rollup-linux-riscv64-gnu@4.21.0': - resolution: {integrity: sha512-H1eRaCwd5E8eS8leiS+o/NqMdljkcb1d6r2h4fKSsCXQilLKArq6WS7XBLDu80Yz+nMqHVFDquwcVrQmGr28rg==} + '@rollup/rollup-linux-riscv64-gnu@4.24.0': + resolution: {integrity: sha512-M3Dg4hlwuntUCdzU7KjYqbbd+BLq3JMAOhCKdBE3TcMGMZbKkDdJ5ivNdehOssMCIokNHFOsv7DO4rlEOfyKpg==} cpu: [riscv64] os: [linux] - '@rollup/rollup-linux-s390x-gnu@4.21.0': - resolution: {integrity: sha512-zJ4hA+3b5tu8u7L58CCSI0A9N1vkfwPhWd/puGXwtZlsB5bTkwDNW/+JCU84+3QYmKpLi+XvHdmrlwUwDA6kqw==} + '@rollup/rollup-linux-s390x-gnu@4.24.0': + resolution: {integrity: sha512-mjBaoo4ocxJppTorZVKWFpy1bfFj9FeCMJqzlMQGjpNPY9JwQi7OuS1axzNIk0nMX6jSgy6ZURDZ2w0QW6D56g==} cpu: [s390x] os: [linux] - '@rollup/rollup-linux-x64-gnu@4.21.0': - resolution: {integrity: sha512-e2hrvElFIh6kW/UNBQK/kzqMNY5mO+67YtEh9OA65RM5IJXYTWiXjX6fjIiPaqOkBthYF1EqgiZ6OXKcQsM0hg==} + '@rollup/rollup-linux-x64-gnu@4.24.0': + resolution: {integrity: sha512-ZXFk7M72R0YYFN5q13niV0B7G8/5dcQ9JDp8keJSfr3GoZeXEoMHP/HlvqROA3OMbMdfr19IjCeNAnPUG93b6A==} cpu: [x64] os: [linux] - '@rollup/rollup-linux-x64-musl@4.21.0': - resolution: {integrity: sha512-1vvmgDdUSebVGXWX2lIcgRebqfQSff0hMEkLJyakQ9JQUbLDkEaMsPTLOmyccyC6IJ/l3FZuJbmrBw/u0A0uCQ==} + '@rollup/rollup-linux-x64-musl@4.24.0': + resolution: {integrity: sha512-w1i+L7kAXZNdYl+vFvzSZy8Y1arS7vMgIy8wusXJzRrPyof5LAb02KGr1PD2EkRcl73kHulIID0M501lN+vobQ==} cpu: [x64] os: [linux] - '@rollup/rollup-win32-arm64-msvc@4.21.0': - resolution: {integrity: sha512-s5oFkZ/hFcrlAyBTONFY1TWndfyre1wOMwU+6KCpm/iatybvrRgmZVM+vCFwxmC5ZhdlgfE0N4XorsDpi7/4XQ==} + '@rollup/rollup-win32-arm64-msvc@4.24.0': + resolution: {integrity: sha512-VXBrnPWgBpVDCVY6XF3LEW0pOU51KbaHhccHw6AS6vBWIC60eqsH19DAeeObl+g8nKAz04QFdl/Cefta0xQtUQ==} cpu: [arm64] os: [win32] - '@rollup/rollup-win32-ia32-msvc@4.21.0': - resolution: {integrity: sha512-G9+TEqRnAA6nbpqyUqgTiopmnfgnMkR3kMukFBDsiyy23LZvUCpiUwjTRx6ezYCjJODXrh52rBR9oXvm+Fp5wg==} + '@rollup/rollup-win32-ia32-msvc@4.24.0': + resolution: {integrity: sha512-xrNcGDU0OxVcPTH/8n/ShH4UevZxKIO6HJFK0e15XItZP2UcaiLFd5kiX7hJnqCbSztUF8Qot+JWBC/QXRPYWQ==} cpu: [ia32] os: [win32] - '@rollup/rollup-win32-x64-msvc@4.21.0': - resolution: {integrity: sha512-2jsCDZwtQvRhejHLfZ1JY6w6kEuEtfF9nzYsZxzSlNVKDX+DpsDJ+Rbjkm74nvg2rdx0gwBS+IMdvwJuq3S9pQ==} + '@rollup/rollup-win32-x64-msvc@4.24.0': + resolution: {integrity: sha512-fbMkAF7fufku0N2dE5TBXcNlg0pt0cJue4xBRE2Qc5Vqikxr4VCgKj/ht6SMdFcOacVA9rqF70APJ8RN/4vMJw==} cpu: [x64] os: [win32] @@ -1250,9 +1253,6 @@ packages: '@types/aria-query@5.0.4': resolution: {integrity: sha512-rfT93uj5s0PRL7EzccGMs3brplhcrghnDoV26NqKhCAS1hVo+WdNsPvE/yb6ilfr5hi2MEk6d5EWJTKdxg8jVw==} - '@types/estree@1.0.5': - resolution: {integrity: sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==} - '@types/estree@1.0.6': resolution: {integrity: sha512-AYnb1nQyY49te+VRAVgmzfcgjYS91mY5P0TKUDCLEM+gNnA+3T6rWITXRLYCpahpqSQbN5cE+gHpnPyXjHWxcw==} @@ -2052,8 +2052,8 @@ packages: resolution: {integrity: sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==} engines: {node: '>=14'} - form-data@4.0.0: - resolution: {integrity: sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==} + form-data@4.0.1: + resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} engines: {node: '>= 6'} framer-motion@11.3.29: @@ -2505,8 +2505,8 @@ packages: resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==} engines: {node: '>= 8'} - micromatch@4.0.7: - resolution: {integrity: sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==} + micromatch@4.0.8: + resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} engines: {node: '>=8.6'} mime-db@1.52.0: @@ -2724,9 +2724,6 @@ packages: perfect-debounce@1.0.0: resolution: {integrity: sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==} - picocolors@1.0.1: - resolution: {integrity: sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==} - picocolors@1.1.0: resolution: {integrity: sha512-TQ92mBOW0l3LeMeyLV6mzy/kWr8lkd/hp3mTg7wYK7zJhuBStmGMBG0BdeDZS/dZx1IukaX6Bk11zcln25o1Aw==} @@ -2929,8 +2926,8 @@ packages: resolution: {integrity: sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==} engines: {iojs: '>=1.0.0', node: '>=0.10.0'} - rollup@4.21.0: - resolution: {integrity: sha512-vo+S/lfA2lMS7rZ2Qoubi6I5hwZwzXeUIctILZLbHI+laNtvhhOIon2S1JksA5UEDQ7l3vberd0fxK44lTYjbQ==} + rollup@4.24.0: + resolution: {integrity: sha512-DOmrlGSXNk1DM0ljiQA+i+o0rSLhtii1je5wgk60j49d1jHT5YYttBv1iWOnYSTG+fZZESUOSNiAl89SIet+Cg==} engines: {node: '>=18.0.0', npm: '>=8.0.0'} hasBin: true @@ -3227,6 +3224,12 @@ packages: '@types/react': optional: true + use-debounce@10.0.3: + resolution: {integrity: sha512-DxQSI9ZKso689WM1mjgGU3ozcxU1TJElBJ3X6S4SMzMNcm2lVH0AHmyXB+K7ewjz2BSUKJTDqTcwtSMRfB89dg==} + engines: {node: '>= 16.0.0'} + peerDependencies: + react: '*' + use-isomorphic-layout-effect@1.1.2: resolution: {integrity: sha512-49L8yCO3iGT/ZF9QttjwLF/ZD9Iwto5LnH5LmEdk/6cFmXddqi2ulF0edxTwjj+7mqvpVVGQWvbXZdn32wRSHA==} peerDependencies: @@ -3254,8 +3257,8 @@ packages: engines: {node: ^18.0.0 || >=20.0.0} hasBin: true - vite@5.4.4: - resolution: {integrity: sha512-RHFCkULitycHVTtelJ6jQLd+KSAAzOgEYorV32R2q++M6COBjKJR6BxqClwp5sf0XaBDjVMuJ9wnNfyAJwjMkA==} + vite@5.4.6: + resolution: {integrity: sha512-IeL5f8OO5nylsgzd9tq4qD2QqI0k2CQLGrWD0rCN0EQJZpBK5vJAx0I+GDkMOXxQX/OfFHMuLIx6ddAxGX/k+Q==} engines: {node: ^18.0.0 || >=20.0.0} hasBin: true peerDependencies: @@ -3397,7 +3400,7 @@ snapshots: '@babel/code-frame@7.24.7': dependencies: '@babel/highlight': 7.24.7 - picocolors: 1.0.1 + picocolors: 1.1.0 '@babel/generator@7.17.7': dependencies: @@ -3445,7 +3448,7 @@ snapshots: '@babel/helper-validator-identifier': 7.24.7 chalk: 2.4.2 js-tokens: 4.0.0 - picocolors: 1.0.1 + picocolors: 1.1.0 '@babel/parser@7.25.4': dependencies: @@ -4484,52 +4487,52 @@ snapshots: '@remix-run/router@1.19.2': {} - '@rollup/rollup-android-arm-eabi@4.21.0': + '@rollup/rollup-android-arm-eabi@4.24.0': optional: true - '@rollup/rollup-android-arm64@4.21.0': + '@rollup/rollup-android-arm64@4.24.0': optional: true - '@rollup/rollup-darwin-arm64@4.21.0': + '@rollup/rollup-darwin-arm64@4.24.0': optional: true - '@rollup/rollup-darwin-x64@4.21.0': + '@rollup/rollup-darwin-x64@4.24.0': optional: true - '@rollup/rollup-linux-arm-gnueabihf@4.21.0': + '@rollup/rollup-linux-arm-gnueabihf@4.24.0': optional: true - '@rollup/rollup-linux-arm-musleabihf@4.21.0': + '@rollup/rollup-linux-arm-musleabihf@4.24.0': optional: true - '@rollup/rollup-linux-arm64-gnu@4.21.0': + '@rollup/rollup-linux-arm64-gnu@4.24.0': optional: true - '@rollup/rollup-linux-arm64-musl@4.21.0': + '@rollup/rollup-linux-arm64-musl@4.24.0': optional: true - '@rollup/rollup-linux-powerpc64le-gnu@4.21.0': + '@rollup/rollup-linux-powerpc64le-gnu@4.24.0': optional: true - '@rollup/rollup-linux-riscv64-gnu@4.21.0': + '@rollup/rollup-linux-riscv64-gnu@4.24.0': optional: true - '@rollup/rollup-linux-s390x-gnu@4.21.0': + '@rollup/rollup-linux-s390x-gnu@4.24.0': optional: true - '@rollup/rollup-linux-x64-gnu@4.21.0': + '@rollup/rollup-linux-x64-gnu@4.24.0': optional: true - '@rollup/rollup-linux-x64-musl@4.21.0': + '@rollup/rollup-linux-x64-musl@4.24.0': optional: true - '@rollup/rollup-win32-arm64-msvc@4.21.0': + '@rollup/rollup-win32-arm64-msvc@4.24.0': optional: true - '@rollup/rollup-win32-ia32-msvc@4.21.0': + '@rollup/rollup-win32-ia32-msvc@4.24.0': optional: true - '@rollup/rollup-win32-x64-msvc@4.21.0': + '@rollup/rollup-win32-x64-msvc@4.24.0': optional: true '@stylistic/eslint-plugin@2.8.0(eslint@9.10.0(jiti@1.21.6))(typescript@5.5.4)': @@ -4672,8 +4675,6 @@ snapshots: '@types/aria-query@5.0.4': {} - '@types/estree@1.0.5': {} - '@types/estree@1.0.6': {} '@types/json-schema@7.0.15': {} @@ -4826,10 +4827,10 @@ snapshots: '@typescript-eslint/types': 8.5.0 eslint-visitor-keys: 3.4.3 - '@vitejs/plugin-react-swc@3.7.0(vite@5.4.4(@types/node@22.5.4))': + '@vitejs/plugin-react-swc@3.7.0(vite@5.4.6(@types/node@22.5.4))': dependencies: '@swc/core': 1.7.14 - vite: 5.4.4(@types/node@22.5.4) + vite: 5.4.6(@types/node@22.5.4) transitivePeerDependencies: - '@swc/helpers' @@ -4858,13 +4859,13 @@ snapshots: chai: 5.1.1 tinyrainbow: 1.2.0 - '@vitest/mocker@2.1.1(@vitest/spy@2.1.1)(vite@5.4.4(@types/node@22.5.4))': + '@vitest/mocker@2.1.1(@vitest/spy@2.1.1)(vite@5.4.6(@types/node@22.5.4))': dependencies: '@vitest/spy': 2.1.1 estree-walker: 3.0.3 magic-string: 0.30.11 optionalDependencies: - vite: 5.4.4(@types/node@22.5.4) + vite: 5.4.6(@types/node@22.5.4) '@vitest/pretty-format@2.1.1': dependencies: @@ -5020,7 +5021,7 @@ snapshots: axios@1.7.7: dependencies: follow-redirects: 1.15.9 - form-data: 4.0.0 + form-data: 4.0.1 proxy-from-env: 1.1.0 transitivePeerDependencies: - debug @@ -5653,7 +5654,7 @@ snapshots: '@nodelib/fs.walk': 1.2.8 glob-parent: 5.1.2 merge2: 1.4.1 - micromatch: 4.0.7 + micromatch: 4.0.8 fast-json-stable-stringify@2.1.0: {} @@ -5705,7 +5706,7 @@ snapshots: cross-spawn: 7.0.3 signal-exit: 4.1.0 - form-data@4.0.0: + form-data@4.0.1: dependencies: asynckit: 0.4.0 combined-stream: 1.0.8 @@ -6144,7 +6145,7 @@ snapshots: merge2@1.4.1: {} - micromatch@4.0.7: + micromatch@4.0.8: dependencies: braces: 3.0.3 picomatch: 2.3.1 @@ -6346,8 +6347,6 @@ snapshots: perfect-debounce@1.0.0: {} - picocolors@1.0.1: {} - picocolors@1.1.0: {} picomatch@2.3.1: {} @@ -6567,26 +6566,26 @@ snapshots: reusify@1.0.4: {} - rollup@4.21.0: + rollup@4.24.0: dependencies: - '@types/estree': 1.0.5 + '@types/estree': 1.0.6 optionalDependencies: - '@rollup/rollup-android-arm-eabi': 4.21.0 - '@rollup/rollup-android-arm64': 4.21.0 - '@rollup/rollup-darwin-arm64': 4.21.0 - '@rollup/rollup-darwin-x64': 4.21.0 - '@rollup/rollup-linux-arm-gnueabihf': 4.21.0 - '@rollup/rollup-linux-arm-musleabihf': 4.21.0 - '@rollup/rollup-linux-arm64-gnu': 4.21.0 - '@rollup/rollup-linux-arm64-musl': 4.21.0 - '@rollup/rollup-linux-powerpc64le-gnu': 4.21.0 - '@rollup/rollup-linux-riscv64-gnu': 4.21.0 - '@rollup/rollup-linux-s390x-gnu': 4.21.0 - '@rollup/rollup-linux-x64-gnu': 4.21.0 - '@rollup/rollup-linux-x64-musl': 4.21.0 - '@rollup/rollup-win32-arm64-msvc': 4.21.0 - '@rollup/rollup-win32-ia32-msvc': 4.21.0 - '@rollup/rollup-win32-x64-msvc': 4.21.0 + '@rollup/rollup-android-arm-eabi': 4.24.0 + '@rollup/rollup-android-arm64': 4.24.0 + '@rollup/rollup-darwin-arm64': 4.24.0 + '@rollup/rollup-darwin-x64': 4.24.0 + '@rollup/rollup-linux-arm-gnueabihf': 4.24.0 + '@rollup/rollup-linux-arm-musleabihf': 4.24.0 + '@rollup/rollup-linux-arm64-gnu': 4.24.0 + '@rollup/rollup-linux-arm64-musl': 4.24.0 + '@rollup/rollup-linux-powerpc64le-gnu': 4.24.0 + '@rollup/rollup-linux-riscv64-gnu': 4.24.0 + '@rollup/rollup-linux-s390x-gnu': 4.24.0 + '@rollup/rollup-linux-x64-gnu': 4.24.0 + '@rollup/rollup-linux-x64-musl': 4.24.0 + '@rollup/rollup-win32-arm64-msvc': 4.24.0 + '@rollup/rollup-win32-ia32-msvc': 4.24.0 + '@rollup/rollup-win32-x64-msvc': 4.24.0 fsevents: 2.3.3 run-parallel@1.2.0: @@ -6902,6 +6901,10 @@ snapshots: optionalDependencies: '@types/react': 18.3.5 + use-debounce@10.0.3(react@18.3.1): + dependencies: + react: 18.3.1 + use-isomorphic-layout-effect@1.1.2(@types/react@18.3.5)(react@18.3.1): dependencies: react: 18.3.1 @@ -6926,7 +6929,7 @@ snapshots: cac: 6.7.14 debug: 4.3.7 pathe: 1.1.2 - vite: 5.4.4(@types/node@22.5.4) + vite: 5.4.6(@types/node@22.5.4) transitivePeerDependencies: - '@types/node' - less @@ -6938,11 +6941,11 @@ snapshots: - supports-color - terser - vite@5.4.4(@types/node@22.5.4): + vite@5.4.6(@types/node@22.5.4): dependencies: esbuild: 0.21.5 postcss: 8.4.45 - rollup: 4.21.0 + rollup: 4.24.0 optionalDependencies: '@types/node': 22.5.4 fsevents: 2.3.3 @@ -6950,7 +6953,7 @@ snapshots: vitest@2.1.1(@types/node@22.5.4)(happy-dom@15.0.0): dependencies: '@vitest/expect': 2.1.1 - '@vitest/mocker': 2.1.1(@vitest/spy@2.1.1)(vite@5.4.4(@types/node@22.5.4)) + '@vitest/mocker': 2.1.1(@vitest/spy@2.1.1)(vite@5.4.6(@types/node@22.5.4)) '@vitest/pretty-format': 2.1.1 '@vitest/runner': 2.1.1 '@vitest/snapshot': 2.1.1 @@ -6965,7 +6968,7 @@ snapshots: tinyexec: 0.3.0 tinypool: 1.0.1 tinyrainbow: 1.2.0 - vite: 5.4.4(@types/node@22.5.4) + vite: 5.4.6(@types/node@22.5.4) vite-node: 2.1.1(@types/node@22.5.4) why-is-node-running: 2.3.0 optionalDependencies: diff --git a/airflow/ui/rules/core.js b/airflow/ui/rules/core.js index 3ec26cad6d32c..3c5133236e98a 100644 --- a/airflow/ui/rules/core.js +++ b/airflow/ui/rules/core.js @@ -96,13 +96,6 @@ export const coreRules = /** @type {const} @satisfies {FlatConfig.Config} */ ({ */ "arrow-body-style": ERROR, - /** - * Limit cyclomatic complexity to a maximum of 10. - * - * @see [complexity](https://eslint.org/docs/latest/rules/complexity) - */ - complexity: [WARN, 10], - /** * Require curly around all control statements. * @@ -291,17 +284,6 @@ export const coreRules = /** @type {const} @satisfies {FlatConfig.Config} */ ({ { max: 250, skipBlankLines: true, skipComments: true }, ], - /** - * Enforce a maximum number of 100 lines of code in a function. - * Need more? Move it to another function. - * - * @see [max-lines-per-function](https://eslint.org/docs/latest/rules/max-lines-per-function) - */ - "max-lines-per-function": [ - ERROR, - { max: 100, skipBlankLines: true, skipComments: true }, - ], - /** * Enforce a maximum depth that callbacks can be nested to 3. * @@ -309,13 +291,6 @@ export const coreRules = /** @type {const} @satisfies {FlatConfig.Config} */ ({ */ "max-nested-callbacks": [ERROR, { max: 3 }], - /** - * Enforce a maximum number of statements allowed in function blocks to 10. - * - * @see [max-statements](https://eslint.org/docs/latest/rules/max-statements) - */ - "max-statements": [WARN, { max: 10 }], - /** * Disallow use of `alert`, `confirm`, and `prompt`. * diff --git a/airflow/ui/rules/react.js b/airflow/ui/rules/react.js index 4c8d8b8ba5f09..508718659b4ad 100644 --- a/airflow/ui/rules/react.js +++ b/airflow/ui/rules/react.js @@ -478,13 +478,6 @@ export const reactRules = /** @type {const} @satisfies {FlatConfig.Config} */ ({ */ [`${reactNamespace}/jsx-max-depth`]: [ERROR, { max: 5 }], - /** - * Disallow `Function#bind` or arrow functions in JSX props. - * - * @see [react/jsx-no-bind](https://github.com/jsx-eslint/eslint-plugin-react/blob/HEAD/docs/rules/jsx-no-bind.md) - */ - [`${reactNamespace}/jsx-no-bind`]: ERROR, - /** * Disallow comments from being inserted as text nodes. * diff --git a/airflow/ui/src/App.test.tsx b/airflow/ui/src/App.test.tsx index 5efcf90f1a05d..38b90d1c4983c 100644 --- a/airflow/ui/src/App.test.tsx +++ b/airflow/ui/src/App.test.tsx @@ -26,6 +26,9 @@ import type { DAGCollectionResponse } from "openapi/requests/types.gen"; import { App } from "./App"; import { Wrapper } from "./utils/Wrapper"; +// The null fields actually have to be null instead of undefined +/* eslint-disable unicorn/no-null */ + const mockListDags: DAGCollectionResponse = { dags: [ { diff --git a/airflow/ui/src/components/DataTable/CardList.tsx b/airflow/ui/src/components/DataTable/CardList.tsx new file mode 100644 index 0000000000000..ddebff81b2495 --- /dev/null +++ b/airflow/ui/src/components/DataTable/CardList.tsx @@ -0,0 +1,70 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Box, SimpleGrid, Skeleton } from "@chakra-ui/react"; +import { + type CoreRow, + flexRender, + type Table as TanStackTable, +} from "@tanstack/react-table"; +import type { SyntheticEvent } from "react"; + +import type { CardDef } from "./types"; + +type DataTableProps = { + readonly cardDef: CardDef; + readonly isLoading?: boolean; + readonly onRowClick?: (e: SyntheticEvent, row: CoreRow) => void; + readonly table: TanStackTable; +}; + +export const CardList = ({ + cardDef, + isLoading, + onRowClick, + table, +}: DataTableProps) => { + const defaultGridProps = { column: { base: 1 }, spacing: 2 }; + + return ( + + + {table.getRowModel().rows.map((row) => ( + onRowClick(event, row) : undefined} + title={onRowClick ? "View details" : undefined} + > + {Boolean(isLoading) && + (cardDef.meta?.customSkeleton ?? ( + + ))} + {!Boolean(isLoading) && + flexRender(cardDef.card, { row: row.original })} + + ))} + + + ); +}; diff --git a/airflow/ui/src/components/DataTable/DataTable.test.tsx b/airflow/ui/src/components/DataTable/DataTable.test.tsx index c83f15f8f3822..028ba27ce2a94 100644 --- a/airflow/ui/src/components/DataTable/DataTable.test.tsx +++ b/airflow/ui/src/components/DataTable/DataTable.test.tsx @@ -16,12 +16,14 @@ * specific language governing permissions and limitations * under the License. */ +import { Text } from "@chakra-ui/react"; import type { ColumnDef, PaginationState } from "@tanstack/react-table"; import "@testing-library/jest-dom"; import { render, screen } from "@testing-library/react"; import { describe, expect, it, vi } from "vitest"; import { DataTable } from "./DataTable.tsx"; +import type { CardDef } from "./types.ts"; const columns: Array> = [ { @@ -36,6 +38,10 @@ const data = [{ name: "John Doe" }, { name: "Jane Doe" }]; const pagination: PaginationState = { pageIndex: 0, pageSize: 1 }; const onStateChange = vi.fn(); +const cardDef: CardDef<{ name: string }> = { + card: ({ row }) => My name is {row.name}., +}; + describe("DataTable", () => { it("renders table with data", () => { render( @@ -84,4 +90,44 @@ describe("DataTable", () => { expect(screen.getByText(">>")).toBeDisabled(); expect(screen.getByText(">")).toBeDisabled(); }); + + it("when isLoading renders skeleton columns", () => { + render(); + + expect(screen.getAllByTestId("skeleton")).toHaveLength(10); + }); + + it("still displays table if mode is card but there is no cardDef", () => { + render(); + + expect(screen.getByText("Name")).toBeInTheDocument(); + }); + + it("displays cards if mode is card and there is cardDef", () => { + render( + , + ); + + expect(screen.getByText("My name is John Doe.")).toBeInTheDocument(); + }); + + it("displays skeleton for loading card list", () => { + render( + , + ); + + expect(screen.getAllByTestId("skeleton")).toHaveLength(5); + }); }); diff --git a/airflow/ui/src/components/DataTable/DataTable.tsx b/airflow/ui/src/components/DataTable/DataTable.tsx index 705d7883f07d2..2ed1a4f16ea3a 100644 --- a/airflow/ui/src/components/DataTable/DataTable.tsx +++ b/airflow/ui/src/components/DataTable/DataTable.tsx @@ -16,61 +16,62 @@ * specific language governing permissions and limitations * under the License. */ +import { Progress, Text } from "@chakra-ui/react"; import { - Table as ChakraTable, - TableContainer, - Tbody, - Td, - Th, - Thead, - Tr, - useColorModeValue, -} from "@chakra-ui/react"; -import { - flexRender, getCoreRowModel, getExpandedRowModel, getPaginationRowModel, useReactTable, - type ColumnDef, type OnChangeFn, type TableState as ReactTableState, type Row, type Table as TanStackTable, type Updater, } from "@tanstack/react-table"; -import React, { Fragment, useCallback, useRef } from "react"; -import { - TiArrowSortedDown, - TiArrowSortedUp, - TiArrowUnsorted, -} from "react-icons/ti"; +import React, { type ReactNode, useCallback, useRef } from "react"; +import { CardList } from "./CardList"; +import { TableList } from "./TableList"; import { TablePaginator } from "./TablePaginator"; -import type { TableState } from "./types"; +import { createSkeletonMock } from "./skeleton"; +import type { CardDef, MetaColumn, TableState } from "./types"; type DataTableProps = { - readonly columns: Array>; + readonly cardDef?: CardDef; + readonly columns: Array>; readonly data: Array; + readonly displayMode?: "card" | "table"; + readonly errorMessage?: ReactNode | string; readonly getRowCanExpand?: (row: Row) => boolean; readonly initialState?: TableState; + readonly isFetching?: boolean; + readonly isLoading?: boolean; + readonly modelName?: string; + readonly noRowsMessage?: ReactNode; readonly onStateChange?: (state: TableState) => void; readonly renderSubComponent?: (props: { row: Row; }) => React.ReactElement; + readonly skeletonCount?: number; readonly total?: number; }; const defaultGetRowCanExpand = () => false; -// eslint-disable-next-line max-lines-per-function export const DataTable = ({ + cardDef, columns, data, + displayMode = "table", + errorMessage, getRowCanExpand = defaultGetRowCanExpand, initialState, + isFetching, + isLoading, + modelName, + noRowsMessage, onStateChange, - renderSubComponent, + skeletonCount = 10, total = 0, }: DataTableProps) => { const ref = useRef<{ tableRef: TanStackTable | undefined }>({ @@ -94,6 +95,10 @@ export const DataTable = ({ [onStateChange], ); + const rest = Boolean(isLoading) + ? createSkeletonMock(displayMode, skeletonCount, columns) + : {}; + const table = useReactTable({ columns, data, @@ -106,87 +111,33 @@ export const DataTable = ({ onStateChange: handleStateChange, rowCount: total, state: initialState, + ...rest, }); ref.current.tableRef = table; - const theadBg = useColorModeValue("white", "gray.800"); + const { rows } = table.getRowModel(); - return ( - - - - {table.getHeaderGroups().map((headerGroup) => ( - - {headerGroup.headers.map( - ({ colSpan, column, getContext, id, isPlaceholder }) => { - const sort = column.getIsSorted(); - const canSort = column.getCanSort(); + const display = displayMode === "card" && Boolean(cardDef) ? "card" : "table"; - return ( - - {isPlaceholder ? undefined : ( - <>{flexRender(column.columnDef.header, getContext())} - )} - {canSort && sort === false ? ( - - ) : undefined} - {canSort && sort !== false ? ( - sort === "desc" ? ( - - ) : ( - - ) - ) : undefined} - - ); - }, - )} - - ))} - - - {table.getRowModel().rows.map((row) => ( - - - {/* first row is a normal row */} - {row.getVisibleCells().map((cell) => ( - - {flexRender(cell.column.columnDef.cell, cell.getContext())} - - ))} - - {row.getIsExpanded() && ( - - {/* 2nd row is a custom 1 cell row */} - - {renderSubComponent?.({ row })} - - - )} - - ))} - - + return ( + <> + + {errorMessage} + {!Boolean(isLoading) && !rows.length && ( + {noRowsMessage ?? `No ${modelName}s found.`} + )} + {display === "table" && } + {display === "card" && cardDef !== undefined && ( + + )} - + ); }; diff --git a/airflow/ui/src/components/DataTable/TableList.tsx b/airflow/ui/src/components/DataTable/TableList.tsx new file mode 100644 index 0000000000000..97b7fd6aed080 --- /dev/null +++ b/airflow/ui/src/components/DataTable/TableList.tsx @@ -0,0 +1,125 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { + Table as ChakraTable, + TableContainer, + Tbody, + Td, + Th, + Thead, + Tr, +} from "@chakra-ui/react"; +import { + flexRender, + type Row, + type Table as TanStackTable, +} from "@tanstack/react-table"; +import React, { Fragment } from "react"; +import { + TiArrowSortedDown, + TiArrowSortedUp, + TiArrowUnsorted, +} from "react-icons/ti"; + +type DataTableProps = { + readonly renderSubComponent?: (props: { + row: Row; + }) => React.ReactElement; + readonly table: TanStackTable; +}; + +export const TableList = ({ + renderSubComponent, + table, +}: DataTableProps) => ( + + + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map( + ({ colSpan, column, getContext, id, isPlaceholder }) => { + const sort = column.getIsSorted(); + const canSort = column.getCanSort(); + + return ( + + {isPlaceholder ? undefined : ( + <>{flexRender(column.columnDef.header, getContext())} + )} + {canSort && sort === false ? ( + + ) : undefined} + {canSort && sort !== false ? ( + sort === "desc" ? ( + + ) : ( + + ) + ) : undefined} + + ); + }, + )} + + ))} + + + {table.getRowModel().rows.map((row) => ( + + + {/* first row is a normal row */} + {row.getVisibleCells().map((cell) => ( + + {flexRender(cell.column.columnDef.cell, cell.getContext())} + + ))} + + {row.getIsExpanded() && ( + + {/* 2nd row is a custom 1 cell row */} + + {renderSubComponent?.({ row })} + + + )} + + ))} + + + +); diff --git a/airflow/ui/src/components/DataTable/ToggleTableDisplay.tsx b/airflow/ui/src/components/DataTable/ToggleTableDisplay.tsx new file mode 100644 index 0000000000000..489fbdc1ce3b6 --- /dev/null +++ b/airflow/ui/src/components/DataTable/ToggleTableDisplay.tsx @@ -0,0 +1,54 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { HStack, IconButton } from "@chakra-ui/react"; +import { FiAlignJustify, FiGrid } from "react-icons/fi"; + +type Display = "card" | "table"; + +type Props = { + readonly display: Display; + readonly setDisplay: (display: Display) => void; +}; + +export const ToggleTableDisplay = ({ display, setDisplay }: Props) => ( + + } + isActive={display === "card"} + minWidth={8} + onClick={() => setDisplay("card")} + variant="outline" + width={8} + /> + } + isActive={display === "table"} + minWidth={8} + onClick={() => setDisplay("table")} + variant="outline" + width={8} + /> + +); diff --git a/airflow/ui/src/components/DataTable/searchParams.ts b/airflow/ui/src/components/DataTable/searchParams.ts index 80bb6e967afaa..39001b097f349 100644 --- a/airflow/ui/src/components/DataTable/searchParams.ts +++ b/airflow/ui/src/components/DataTable/searchParams.ts @@ -18,13 +18,19 @@ */ import type { SortingState } from "@tanstack/react-table"; +import { + SearchParamsKeys, + type SearchParamsKeysType, +} from "src/constants/searchParams"; + import type { TableState } from "./types"; -export const LIMIT_PARAM = "limit"; -export const OFFSET_PARAM = "offset"; -export const SORT_PARAM = "sort"; +const { + LIMIT: LIMIT_PARAM, + OFFSET: OFFSET_PARAM, + SORT: SORT_PARAM, +}: SearchParamsKeysType = SearchParamsKeys; -// eslint-disable-next-line max-statements export const stateToSearchParams = ( state: TableState, defaultTableState?: TableState, diff --git a/airflow/ui/src/components/DataTable/skeleton.tsx b/airflow/ui/src/components/DataTable/skeleton.tsx new file mode 100644 index 0000000000000..e237e4ad1030b --- /dev/null +++ b/airflow/ui/src/components/DataTable/skeleton.tsx @@ -0,0 +1,51 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Skeleton } from "@chakra-ui/react"; + +import type { MetaColumn } from "./types"; + +export const createSkeletonMock = ( + mode: "card" | "table", + skeletonCount: number, + columnDefs: Array>, +) => { + const colDefs = columnDefs.map((colDef) => ({ + ...colDef, + cell: () => { + if (mode === "table") { + return ( + colDef.meta?.customSkeleton ?? ( + + ) + ); + } + + return undefined; + }, + })); + + const data = [...Array(skeletonCount)].map(() => ({})); + + return { columns: colDefs, data }; +}; diff --git a/airflow/ui/src/components/DataTable/types.ts b/airflow/ui/src/components/DataTable/types.ts index febf9acdb0403..4741b61ff6433 100644 --- a/airflow/ui/src/components/DataTable/types.ts +++ b/airflow/ui/src/components/DataTable/types.ts @@ -16,9 +16,30 @@ * specific language governing permissions and limitations * under the License. */ -import type { PaginationState, SortingState } from "@tanstack/react-table"; +import type { SimpleGridProps } from "@chakra-ui/react"; +import type { + ColumnDef, + PaginationState, + SortingState, +} from "@tanstack/react-table"; +import type { ReactNode } from "react"; export type TableState = { pagination: PaginationState; sorting: SortingState; }; + +export type CardDef = { + card: (props: { row: TData }) => ReactNode; + gridProps?: SimpleGridProps; + meta?: { + customSkeleton?: JSX.Element; + }; +}; + +export type MetaColumn = { + meta?: { + customSkeleton?: ReactNode; + skeletonWidth?: number; + } & ColumnDef["meta"]; +} & ColumnDef; diff --git a/airflow/ui/src/components/ErrorAlert.tsx b/airflow/ui/src/components/ErrorAlert.tsx new file mode 100644 index 0000000000000..3128a2cdec2c4 --- /dev/null +++ b/airflow/ui/src/components/ErrorAlert.tsx @@ -0,0 +1,67 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { Alert, AlertIcon } from "@chakra-ui/react"; +import type { ApiError } from "openapi-gen/requests/core/ApiError"; +import type { + HTTPExceptionResponse, + HTTPValidationError, +} from "openapi-gen/requests/types.gen"; + +type ExpandedApiError = { + body: HTTPExceptionResponse | HTTPValidationError; +} & ApiError; + +type Props = { + readonly error?: unknown; +}; + +export const ErrorAlert = ({ error: err }: Props) => { + const error = err as ExpandedApiError; + + if (!Boolean(error)) { + return undefined; + } + + const details = error.body.detail; + let detailMessage; + + if (details !== undefined) { + if (typeof details === "string") { + detailMessage = details; + } else if (Array.isArray(details)) { + detailMessage = details.map( + (detail) => ` + ${detail.loc.join(".")} ${detail.msg}`, + ); + } else { + detailMessage = Object.keys(details).map( + (key) => `${key}: ${details[key] as string}`, + ); + } + } + + return ( + + + {error.message} +
+ {detailMessage} +
+ ); +}; diff --git a/airflow/ui/src/components/SearchBar.tsx b/airflow/ui/src/components/SearchBar.tsx index 830f942677123..ad50a65b4311b 100644 --- a/airflow/ui/src/components/SearchBar.tsx +++ b/airflow/ui/src/components/SearchBar.tsx @@ -18,15 +18,19 @@ */ import { Button, - type ButtonProps, Input, InputGroup, - type InputGroupProps, InputLeftElement, - type InputProps, InputRightElement, + type ButtonProps, + type InputGroupProps, + type InputProps, } from "@chakra-ui/react"; +import type { ChangeEvent } from "react"; import { FiSearch } from "react-icons/fi"; +import { useDebouncedCallback } from "use-debounce"; + +const debounceDelay = 200; export const SearchBar = ({ buttonProps, @@ -36,23 +40,35 @@ export const SearchBar = ({ readonly buttonProps?: ButtonProps; readonly groupProps?: InputGroupProps; readonly inputProps?: InputProps; -}) => ( - - - - - - - - - -); +}) => { + const handleSearchChange = useDebouncedCallback( + (event: ChangeEvent) => inputProps?.onChange?.(event), + debounceDelay, + ); + + return ( + + + + + + + + + + ); +}; diff --git a/airflow/ui/src/constants/searchParams.ts b/airflow/ui/src/constants/searchParams.ts new file mode 100644 index 0000000000000..893a4461bffe7 --- /dev/null +++ b/airflow/ui/src/constants/searchParams.ts @@ -0,0 +1,31 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +export enum SearchParamsKeys { + LAST_DAG_RUN_STATE = "last_dag_run_state", + LIMIT = "limit", + NAME_PATTERN = "name_pattern", + OFFSET = "offset", + PAUSED = "paused", + SORT = "sort", +} + +export type SearchParamsKeysType = Record< + keyof typeof SearchParamsKeys, + string +>; diff --git a/airflow/ui/src/layouts/Nav/Nav.tsx b/airflow/ui/src/layouts/Nav/Nav.tsx index 9886b5eb75760..3b47595e8c173 100644 --- a/airflow/ui/src/layouts/Nav/Nav.tsx +++ b/airflow/ui/src/layouts/Nav/Nav.tsx @@ -21,7 +21,6 @@ import { Flex, Icon, Link, - useColorMode, useColorModeValue, VStack, } from "@chakra-ui/react"; @@ -32,9 +31,7 @@ import { FiDatabase, FiGlobe, FiHome, - FiMoon, FiSettings, - FiSun, } from "react-icons/fi"; import { AirflowPin } from "src/assets/AirflowPin"; @@ -42,9 +39,9 @@ import { DagIcon } from "src/assets/DagIcon"; import { DocsButton } from "./DocsButton"; import { NavButton } from "./NavButton"; +import { UserSettingsButton } from "./UserSettingsButton"; export const Nav = () => { - const { colorMode, toggleColorMode } = useColorMode(); const navBg = useColorModeValue("blue.100", "blue.900"); return ( @@ -106,16 +103,7 @@ export const Nav = () => { title="Return to legacy UI" /> - - ) : ( - - ) - } - onClick={toggleColorMode} - /> + ); diff --git a/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx b/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx new file mode 100644 index 0000000000000..c43c17b6d039b --- /dev/null +++ b/airflow/ui/src/layouts/Nav/UserSettingsButton.tsx @@ -0,0 +1,58 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { + IconButton, + Menu, + MenuButton, + useColorMode, + MenuItem, + MenuList, +} from "@chakra-ui/react"; +import { FiMoon, FiSun, FiUser } from "react-icons/fi"; + +import { navButtonProps } from "./navButtonProps"; + +export const UserSettingsButton = () => { + const { colorMode, toggleColorMode } = useColorMode(); + + return ( + + } + {...navButtonProps} + /> + + + {colorMode === "light" ? ( + <> + + Switch to Dark Mode + + ) : ( + <> + + Switch to Light Mode + + )} + + + + ); +}; diff --git a/airflow/ui/src/main.tsx b/airflow/ui/src/main.tsx index daf4bcd024cd6..12434ca7bade2 100644 --- a/airflow/ui/src/main.tsx +++ b/airflow/ui/src/main.tsx @@ -18,7 +18,7 @@ */ import { ChakraProvider } from "@chakra-ui/react"; import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; -import axios, { type AxiosError, type AxiosResponse } from "axios"; +import axios, { type AxiosError } from "axios"; import { createRoot } from "react-dom/client"; import { BrowserRouter } from "react-router-dom"; @@ -45,7 +45,7 @@ const queryClient = new QueryClient({ // redirect to login page if the API responds with unauthorized or forbidden errors axios.interceptors.response.use( - (response: AxiosResponse) => response, + (response) => response, (error: AxiosError) => { if (error.response?.status === 403 || error.response?.status === 401) { const params = new URLSearchParams(); @@ -53,6 +53,8 @@ axios.interceptors.response.use( params.set("next", globalThis.location.href); globalThis.location.replace(`/login?${params.toString()}`); } + + return Promise.reject(error); }, ); diff --git a/airflow/ui/src/pages/DagsList/DagCard.test.tsx b/airflow/ui/src/pages/DagsList/DagCard.test.tsx new file mode 100644 index 0000000000000..3ae6e4fceeac9 --- /dev/null +++ b/airflow/ui/src/pages/DagsList/DagCard.test.tsx @@ -0,0 +1,87 @@ +/* eslint-disable unicorn/no-null */ + +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { render, screen } from "@testing-library/react"; +import type { + DAGResponse, + DagTagPydantic, +} from "openapi-gen/requests/types.gen"; +import { afterEach, describe, it, vi, expect } from "vitest"; + +import { Wrapper } from "src/utils/Wrapper"; + +import { DagCard } from "./DagCard"; + +const mockDag = { + dag_display_name: "nested_groups", + dag_id: "nested_groups", + default_view: "grid", + description: null, + file_token: + "Ii9maWxlcy9kYWdzL25lc3RlZF90YXNrX2dyb3Vwcy5weSI.G3EkdxmDUDQsVb7AIZww1TSGlFE", + fileloc: "/files/dags/nested_task_groups.py", + has_import_errors: false, + has_task_concurrency_limits: false, + is_active: true, + is_paused: false, + last_expired: null, + last_parsed_time: "2024-08-22T13:50:10.372238+00:00", + last_pickled: null, + max_active_runs: 16, + max_active_tasks: 16, + max_consecutive_failed_dag_runs: 0, + next_dagrun: "2024-08-22T00:00:00+00:00", + next_dagrun_create_after: "2024-08-23T00:00:00+00:00", + next_dagrun_data_interval_end: "2024-08-23T00:00:00+00:00", + next_dagrun_data_interval_start: "2024-08-22T00:00:00+00:00", + owners: ["airflow"], + pickle_id: null, + scheduler_lock: null, + tags: [], + timetable_description: "", + timetable_summary: "", +} satisfies DAGResponse; + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe("DagCard", () => { + it("DagCard should render without tags", () => { + render(, { wrapper: Wrapper }); + expect(screen.getByText(mockDag.dag_display_name)).toBeInTheDocument(); + expect(screen.queryByTestId("dag-tag")).toBeNull(); + }); + + it("DagCard should show +X more text if there are more than 3 tags", () => { + const tags = [ + { dag_id: "id", name: "tag1" }, + { dag_id: "id", name: "tag2" }, + { dag_id: "id", name: "tag3" }, + { dag_id: "id", name: "tag4" }, + ] satisfies Array; + + const expandedMockDag = { ...mockDag, tags } satisfies DAGResponse; + + render(, { wrapper: Wrapper }); + expect(screen.getByTestId("dag-tag")).toBeInTheDocument(); + expect(screen.getByText("+1 more")).toBeInTheDocument(); + }); +}); diff --git a/airflow/ui/src/pages/DagsList/DagCard.tsx b/airflow/ui/src/pages/DagsList/DagCard.tsx new file mode 100644 index 0000000000000..d555abbc0ce1b --- /dev/null +++ b/airflow/ui/src/pages/DagsList/DagCard.tsx @@ -0,0 +1,118 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import { + Badge, + Box, + Flex, + HStack, + Heading, + SimpleGrid, + Text, + Tooltip, + useColorModeValue, + VStack, +} from "@chakra-ui/react"; +import { FiCalendar, FiTag } from "react-icons/fi"; + +import type { DAGResponse } from "openapi/requests/types.gen"; +import { TogglePause } from "src/components/TogglePause"; + +type Props = { + readonly dag: DAGResponse; +}; + +const MAX_TAGS = 3; + +export const DagCard = ({ dag }: Props) => { + const cardBorder = useColorModeValue("gray.100", "gray.700"); + const tooltipBg = useColorModeValue("gray.200", "gray.700"); + + return ( + + + + + + {dag.dag_display_name} + + + {dag.tags.length ? ( + + + {dag.tags.slice(0, MAX_TAGS).map((tag) => ( + {tag.name} + ))} + {dag.tags.length > MAX_TAGS && ( + + {dag.tags.slice(MAX_TAGS).map((tag) => ( + {tag.name} + ))} + + } + > + +{dag.tags.length - MAX_TAGS} more + + )} + + ) : undefined} + + + + + + +
+ + + Next Run + + {Boolean(dag.next_dagrun) ? ( + {dag.next_dagrun} + ) : undefined} + {Boolean(dag.timetable_summary) ? ( + + + {" "} + {" "} + {dag.timetable_summary} + + + ) : undefined} + +
+
+ + + ); +}; diff --git a/airflow/ui/src/pages/DagsList/DagsFilters.tsx b/airflow/ui/src/pages/DagsList/DagsFilters.tsx index cb2be8322e500..3d507ace365db 100644 --- a/airflow/ui/src/pages/DagsList/DagsFilters.tsx +++ b/airflow/ui/src/pages/DagsList/DagsFilters.tsx @@ -23,13 +23,25 @@ import { useSearchParams } from "react-router-dom"; import { useTableURLState } from "src/components/DataTable/useTableUrlState"; import { QuickFilterButton } from "src/components/QuickFilterButton"; +import { + SearchParamsKeys, + type SearchParamsKeysType, +} from "src/constants/searchParams"; -const PAUSED_PARAM = "paused"; +const { + LAST_DAG_RUN_STATE: LAST_DAG_RUN_STATE_PARAM, + PAUSED: PAUSED_PARAM, +}: SearchParamsKeysType = SearchParamsKeys; export const DagsFilters = () => { const [searchParams, setSearchParams] = useSearchParams(); const showPaused = searchParams.get(PAUSED_PARAM); + const state = searchParams.get(LAST_DAG_RUN_STATE_PARAM); + const isAll = state === null; + const isRunning = state === "running"; + const isFailed = state === "failed"; + const isSuccess = state === "success"; const { setTableURLState, tableURLState } = useTableURLState(); const { pagination, sorting } = tableURLState; @@ -51,6 +63,23 @@ export const DagsFilters = () => { [pagination, searchParams, setSearchParams, setTableURLState, sorting], ); + const handleStateChange: React.MouseEventHandler = + useCallback( + ({ currentTarget: { value } }) => { + if (value === "all") { + searchParams.delete(LAST_DAG_RUN_STATE_PARAM); + } else { + searchParams.set(LAST_DAG_RUN_STATE_PARAM, value); + } + setSearchParams(searchParams); + setTableURLState({ + pagination: { ...pagination, pageIndex: 0 }, + sorting, + }); + }, + [pagination, searchParams, setSearchParams, setTableURLState, sorting], + ); + return ( @@ -59,10 +88,34 @@ export const DagsFilters = () => { State: - All - Failed - Running - Successful + + All + + + Failed + + + Running + + + Successful + diff --git a/airflow/ui/src/pages/DagsList/DagsList.tsx b/airflow/ui/src/pages/DagsList/DagsList.tsx index d58e3eaa2038c..623b8a3b4bacc 100644 --- a/airflow/ui/src/pages/DagsList/DagsList.tsx +++ b/airflow/ui/src/pages/DagsList/DagsList.tsx @@ -21,21 +21,34 @@ import { Heading, HStack, Select, - Spinner, + Skeleton, VStack, } from "@chakra-ui/react"; import type { ColumnDef } from "@tanstack/react-table"; -import { type ChangeEventHandler, useCallback } from "react"; +import { + type ChangeEvent, + type ChangeEventHandler, + useCallback, + useState, +} from "react"; import { useSearchParams } from "react-router-dom"; import { useDagServiceGetDags } from "openapi/queries"; -import type { DAGResponse } from "openapi/requests/types.gen"; +import type { DAGResponse, DagRunState } from "openapi/requests/types.gen"; import { DataTable } from "src/components/DataTable"; +import { ToggleTableDisplay } from "src/components/DataTable/ToggleTableDisplay"; +import type { CardDef } from "src/components/DataTable/types"; import { useTableURLState } from "src/components/DataTable/useTableUrlState"; +import { ErrorAlert } from "src/components/ErrorAlert"; import { SearchBar } from "src/components/SearchBar"; import { TogglePause } from "src/components/TogglePause"; +import { + SearchParamsKeys, + type SearchParamsKeysType, +} from "src/constants/searchParams"; import { pluralize } from "src/utils/pluralize"; +import { DagCard } from "./DagCard"; import { DagsFilters } from "./DagsFilters"; const columns: Array> = [ @@ -49,6 +62,9 @@ const columns: Array> = [ ), enableSorting: false, header: "", + meta: { + skeletonWidth: 10, + }, }, { accessorKey: "dag_id", @@ -83,28 +99,74 @@ const columns: Array> = [ }, ]; -const PAUSED_PARAM = "paused"; +const { + LAST_DAG_RUN_STATE: LAST_DAG_RUN_STATE_PARAM, + NAME_PATTERN: NAME_PATTERN_PARAM, + PAUSED: PAUSED_PARAM, +}: SearchParamsKeysType = SearchParamsKeys; + +const cardDef: CardDef = { + card: ({ row }) => , + meta: { + customSkeleton: , + }, +}; -// eslint-disable-next-line complexity -export const DagsList = ({ cardView = false }) => { - const [searchParams] = useSearchParams(); +export const DagsList = () => { + const [searchParams, setSearchParams] = useSearchParams(); + const [display, setDisplay] = useState<"card" | "table">("card"); const showPaused = searchParams.get(PAUSED_PARAM); + const lastDagRunState = searchParams.get( + LAST_DAG_RUN_STATE_PARAM, + ) as DagRunState; const { setTableURLState, tableURLState } = useTableURLState(); const { pagination, sorting } = tableURLState; + const [dagDisplayNamePattern, setDagDisplayNamePattern] = useState( + searchParams.get(NAME_PATTERN_PARAM) ?? undefined, + ); // TODO: update API to accept multiple orderBy params const [sort] = sorting; const orderBy = sort ? `${sort.desc ? "-" : ""}${sort.id}` : undefined; - const { data, isLoading } = useDagServiceGetDags({ - limit: pagination.pageSize, - offset: pagination.pageIndex * pagination.pageSize, - onlyActive: true, - orderBy, - paused: showPaused === null ? undefined : showPaused === "true", - }); + const handleSearchChange = ({ + target: { value }, + }: ChangeEvent) => { + if (value) { + searchParams.set(NAME_PATTERN_PARAM, value); + } else { + searchParams.delete(NAME_PATTERN_PARAM); + } + setSearchParams(searchParams); + setTableURLState({ + pagination: { ...pagination, pageIndex: 0 }, + sorting, + }); + setDagDisplayNamePattern(value); + }; + + const { data, error, isFetching, isLoading } = useDagServiceGetDags( + { + dagDisplayNamePattern: Boolean(dagDisplayNamePattern) + ? `%${dagDisplayNamePattern}%` + : undefined, + lastDagRunState, + limit: pagination.pageSize, + offset: pagination.pageIndex * pagination.pageSize, + onlyActive: true, + orderBy, + paused: showPaused === null ? undefined : showPaused === "true", + }, + [dagDisplayNamePattern, showPaused], + { + refetchOnMount: true, + refetchOnReconnect: false, + refetchOnWindowFocus: false, + staleTime: 5 * 60 * 1000, + }, + ); const handleSortChange = useCallback>( ({ currentTarget: { value } }) => { @@ -120,44 +182,51 @@ export const DagsList = ({ cardView = false }) => { return ( <> - {isLoading ? : undefined} - {!isLoading && Boolean(data?.dags) && ( - <> - - - - - - {pluralize("DAG", data?.total_entries)} - - {cardView ? ( - - ) : ( - false - )} - - - - - )} + + + + + + {pluralize("DAG", data?.total_entries)} + + {display === "card" ? ( + + ) : ( + false + )} + + + + } + initialState={tableURLState} + isFetching={isFetching} + isLoading={isLoading} + modelName="DAG" + onStateChange={setTableURLState} + skeletonCount={display === "card" ? 5 : undefined} + total={data?.total_entries} + /> ); }; diff --git a/airflow/ui/src/theme.ts b/airflow/ui/src/theme.ts index e172bf76508d1..06a3b10cc7fcf 100644 --- a/airflow/ui/src/theme.ts +++ b/airflow/ui/src/theme.ts @@ -24,39 +24,33 @@ import { createMultiStyleConfigHelpers, extendTheme } from "@chakra-ui/react"; const { defineMultiStyleConfig, definePartsStyle } = createMultiStyleConfigHelpers(tableAnatomy.keys); -const baseStyle = definePartsStyle((props) => { - const { colorMode, colorScheme } = props; - - return { - tbody: { - tr: { - "&:nth-of-type(even)": { - "th, td": { - borderBottomWidth: "0px", - }, +const baseStyle = definePartsStyle(() => ({ + tbody: { + tr: { + "&:nth-of-type(even)": { + "th, td": { + borderBottomWidth: "0px", + }, + }, + "&:nth-of-type(odd)": { + td: { + background: "subtle-bg", }, - "&:nth-of-type(odd)": { - td: { - background: - colorMode === "light" ? `${colorScheme}.50` : `gray.900`, - }, - "th, td": { - borderBottomWidth: "0px", - borderColor: - colorMode === "light" ? `${colorScheme}.50` : `gray.900`, - }, + "th, td": { + borderBottomWidth: "0px", + borderColor: "subtle-bg", }, }, }, - thead: { - tr: { - th: { - borderBottomWidth: 0, - }, + }, + thead: { + tr: { + th: { + borderBottomWidth: 0, }, }, - }; -}); + }, +})); export const tableTheme = defineMultiStyleConfig({ baseStyle }); @@ -72,6 +66,12 @@ const theme = extendTheme({ config: { useSystemColorMode: true, }, + semanticTokens: { + colors: { + "subtle-bg": { _dark: "gray.900", _light: "blue.50" }, + "subtle-text": { _dark: "blue.500", _light: "blue.600" }, + }, + }, styles: { global: { "*, *::before, &::after": { diff --git a/airflow/utils/api_migration.py b/airflow/utils/api_migration.py index d6b61a933deed..3e6ba3881cbed 100644 --- a/airflow/utils/api_migration.py +++ b/airflow/utils/api_migration.py @@ -31,7 +31,7 @@ RT = TypeVar("RT") -def mark_fastapi_migration_done(function: Callable[PS, RT]) -> Callable[PS, RT]: +def mark_fastapi_migration_done(function: Callable[..., RT]) -> Callable[..., RT]: """ Mark an endpoint as migrated over to the new FastAPI API. diff --git a/airflow/utils/db.py b/airflow/utils/db.py index c185c70a9811f..26af566c7e2bd 100644 --- a/airflow/utils/db.py +++ b/airflow/utils/db.py @@ -96,7 +96,7 @@ class MappedClassProtocol(Protocol): "2.9.0": "1949afb29106", "2.9.2": "686269002441", "2.10.0": "22ed7efa9da2", - "3.0.0": "c3389cd7793f", + "3.0.0": "5a5d66100783", } @@ -1203,19 +1203,22 @@ def resetdb(session: Session = NEW_SESSION, skip_init: bool = False): if not settings.engine: raise RuntimeError("The settings.engine must be set. This is a critical assertion") log.info("Dropping tables that exist") + original_logging_level = logging.root.level + try: + import_all_models() - import_all_models() - - connection = settings.engine.connect() + connection = settings.engine.connect() - with create_global_lock(session=session, lock=DBLocks.MIGRATIONS), connection.begin(): - drop_airflow_models(connection) - drop_airflow_moved_tables(connection) - external_db_manager = RunDBManager() - external_db_manager.drop_tables(session, connection) + with create_global_lock(session=session, lock=DBLocks.MIGRATIONS), connection.begin(): + drop_airflow_models(connection) + drop_airflow_moved_tables(connection) + external_db_manager = RunDBManager() + external_db_manager.drop_tables(session, connection) - if not skip_init: - initdb(session=session) + if not skip_init: + initdb(session=session) + finally: + logging.root.setLevel(original_logging_level) @provide_session diff --git a/airflow/utils/file.py b/airflow/utils/file.py index 2e39eb7dd7b52..86b7a7891ca8f 100644 --- a/airflow/utils/file.py +++ b/airflow/utils/file.py @@ -355,6 +355,6 @@ def get_unique_dag_module_name(file_path: str) -> str: """Return a unique module name in the format unusual_prefix_{sha1 of module's file path}_{original module name}.""" if isinstance(file_path, str): path_hash = hashlib.sha1(file_path.encode("utf-8")).hexdigest() - org_mod_name = Path(file_path).stem + org_mod_name = re2.sub(r"[.-]", "_", Path(file_path).stem) return MODIFIED_DAG_MODULE_NAME.format(path_hash=path_hash, module_name=org_mod_name) raise ValueError("file_path should be a string to generate unique module name") diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index 13c93d992fffa..4f9604aced7f4 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -22,7 +22,7 @@ import logging import sys from enum import Enum -from functools import cached_property +from functools import cache, cached_property from typing import ( TYPE_CHECKING, Any, @@ -42,7 +42,6 @@ import re2 from airflow import settings -from airflow.compat.functools import cache if TYPE_CHECKING: from kubernetes.client import V1EnvVar diff --git a/airflow/utils/platform.py b/airflow/utils/platform.py index 7945e2b945382..74f56a0ab037d 100644 --- a/airflow/utils/platform.py +++ b/airflow/utils/platform.py @@ -24,8 +24,7 @@ import pkgutil import platform import sys - -from airflow.compat.functools import cache +from functools import cache IS_WINDOWS = platform.system() == "Windows" diff --git a/airflow/www/forms.py b/airflow/www/forms.py index 5b746ab633c06..a3b1d5262db20 100644 --- a/airflow/www/forms.py +++ b/airflow/www/forms.py @@ -20,6 +20,7 @@ import datetime import json import operator +from functools import cache from typing import Iterator import pendulum @@ -36,7 +37,6 @@ from wtforms.fields import Field, IntegerField, PasswordField, SelectField, StringField, TextAreaField from wtforms.validators import InputRequired, Optional -from airflow.compat.functools import cache from airflow.configuration import conf from airflow.providers_manager import ProvidersManager from airflow.utils.types import DagRunType diff --git a/airflow/www/static/js/types/api-generated.ts b/airflow/www/static/js/types/api-generated.ts index 87f89d19cd714..ef45dbd3b57b6 100644 --- a/airflow/www/static/js/types/api-generated.ts +++ b/airflow/www/static/js/types/api-generated.ts @@ -919,7 +919,7 @@ export interface components { /** @description To date of the backfill (exclusive). */ to_date?: string | null; /** @description Dag run conf to be forwarded to the dag runs. */ - dag_run_conf?: string | null; + dag_run_conf?: { [key: string]: unknown } | null; /** @description is_paused */ is_paused?: boolean | null; /** @description max_active_runs */ @@ -1263,7 +1263,7 @@ export interface components { }; /** @description Collection of DAG warnings. */ DagWarningCollection: { - import_errors?: components["schemas"]["DagWarning"][]; + dag_warnings?: components["schemas"]["DagWarning"][]; } & components["schemas"]["CollectionInfo"]; SetDagRunNote: { /** @description Custom notes left by users for this Dag Run. */ @@ -2728,22 +2728,6 @@ export interface operations { }; }; create_backfill: { - parameters: { - query: { - /** Create dag runs for this dag. */ - dag_id: string; - /** Create dag runs with logical dates from this date onward, including this date. */ - from_date: string; - /** Create dag runs for logical dates up to but not including this date. */ - to_date: string; - /** Maximum number of active DAG runs for the the backfill. */ - max_active_runs?: number; - /** If true, run the dag runs in descending order of logical date. */ - reverse?: boolean; - /** If true, run the dag runs in descending order of logical date. */ - config?: string; - }; - }; responses: { /** Success. */ 200: { @@ -2755,6 +2739,11 @@ export interface operations { 401: components["responses"]["Unauthenticated"]; 403: components["responses"]["PermissionDenied"]; }; + requestBody: { + content: { + "application/json": components["schemas"]["Backfill"]; + }; + }; }; get_backfill: { parameters: { @@ -4973,8 +4962,12 @@ export interface operations { get_dag_stats: { parameters: { query: { + /** The numbers of items to return. */ + limit?: components["parameters"]["PageLimit"]; + /** The number of items to skip before starting to collect the result set. */ + offset?: components["parameters"]["PageOffset"]; /** One or more DAG IDs separated by commas to filter relevant Dags. */ - dag_ids: string; + dag_ids?: string; }; }; responses: { @@ -5531,7 +5524,7 @@ export type ListBackfillsVariables = CamelCasedPropertiesDeep< operations["list_backfills"]["parameters"]["query"] >; export type CreateBackfillVariables = CamelCasedPropertiesDeep< - operations["create_backfill"]["parameters"]["query"] + operations["create_backfill"]["requestBody"]["content"]["application/json"] >; export type GetBackfillVariables = CamelCasedPropertiesDeep< operations["get_backfill"]["parameters"]["path"] diff --git a/airflow/www/templates/airflow/dags.html b/airflow/www/templates/airflow/dags.html index c629936df7c00..6da7090a14781 100644 --- a/airflow/www/templates/airflow/dags.html +++ b/airflow/www/templates/airflow/dags.html @@ -489,6 +489,6 @@

{{ page_title }}

} {% if scarf_url %} - + {% endif %} {% endblock %} diff --git a/airflow/www/views.py b/airflow/www/views.py index 47c548d5e7667..23ccae6224960 100644 --- a/airflow/www/views.py +++ b/airflow/www/views.py @@ -32,7 +32,7 @@ import warnings from bisect import insort_left from collections import defaultdict -from functools import cached_property +from functools import cache, cached_property from json import JSONDecodeError from pathlib import Path from typing import TYPE_CHECKING, Any, Collection, Iterator, Mapping, MutableMapping, Sequence @@ -89,7 +89,6 @@ ) from airflow.assets import Asset, AssetAlias from airflow.auth.managers.models.resource_details import AccessView, DagAccessEntity, DagDetails -from airflow.compat.functools import cache from airflow.configuration import AIRFLOW_CONFIG, conf from airflow.exceptions import ( AirflowConfigException, @@ -178,7 +177,7 @@ def sanitize_args(args: dict[str, Any]) -> dict[str, Any]: return {key: value for key, value in args.items() if not key.startswith("_")} -# Following the release of https://github.com/python/cpython/issues/102153 in Python 3.8.17 and 3.9.17 on +# Following the release of https://github.com/python/cpython/issues/102153 in Python 3.9.17 on # June 6, 2023, we are adding extra sanitization of the urls passed to get_safe_url method to make it works # the same way regardless if the user uses latest Python patchlevel versions or not. This also follows # a recommended solution by the Python core team. @@ -3349,6 +3348,7 @@ def grid_data(self): @expose("/object/historical_metrics_data") @auth.has_access_view(AccessView.CLUSTER_ACTIVITY) + @mark_fastapi_migration_done def historical_metrics_data(self): """Return cluster activity historical metrics.""" start_date = _safe_parse_datetime(request.args.get("start_date")) @@ -3450,7 +3450,7 @@ def next_run_datasets(self, dag_id): ), isouter=True, ) - .where(DagScheduleAssetReference.dag_id == dag_id, ~AssetModel.is_orphaned) + .where(DagScheduleAssetReference.dag_id == dag_id, AssetModel.active.has()) .group_by(AssetModel.id, AssetModel.uri) .order_by(AssetModel.uri) ) @@ -3583,7 +3583,7 @@ def datasets_summary(self): if has_event_filters: count_query = count_query.join(AssetEvent, AssetEvent.dataset_id == AssetModel.id) - filters = [~AssetModel.is_orphaned] + filters = [AssetModel.active.has()] if uri_pattern: filters.append(AssetModel.uri.ilike(f"%{uri_pattern}%")) if updated_after: diff --git a/chart/templates/scheduler/scheduler-deployment.yaml b/chart/templates/scheduler/scheduler-deployment.yaml index 634c3a41f7de0..551ab94c8c48d 100644 --- a/chart/templates/scheduler/scheduler-deployment.yaml +++ b/chart/templates/scheduler/scheduler-deployment.yaml @@ -324,7 +324,9 @@ spec: emptyDir: {{- toYaml (default (dict) .Values.logs.emptyDirConfig) | nindent 12 }} {{- else }} volumeClaimTemplates: - - metadata: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: name: logs {{- if .Values.workers.persistence.annotations }} annotations: {{- toYaml .Values.workers.persistence.annotations | nindent 10 }} diff --git a/chart/templates/workers/worker-deployment.yaml b/chart/templates/workers/worker-deployment.yaml index 23852d3427522..5499e64ebece5 100644 --- a/chart/templates/workers/worker-deployment.yaml +++ b/chart/templates/workers/worker-deployment.yaml @@ -74,6 +74,9 @@ spec: tier: airflow component: worker release: {{ .Release.Name }} + {{- if and $persistence .Values.workers.podManagementPolicy }} + podManagementPolicy: {{ .Values.workers.podManagementPolicy }} + {{- end }} {{- if and $persistence .Values.workers.updateStrategy }} updateStrategy: {{- toYaml .Values.workers.updateStrategy | nindent 4 }} {{- end }} diff --git a/chart/values.schema.json b/chart/values.schema.json index d8b5de41c8eb8..0f93773dfded9 100644 --- a/chart/values.schema.json +++ b/chart/values.schema.json @@ -841,7 +841,7 @@ "tag": { "description": "The StatsD image tag.", "type": "string", - "default": "v0.27.2-pr579" + "default": "v0.27.2" }, "pullPolicy": { "description": "The StatsD image pull policy.", @@ -1598,6 +1598,18 @@ ], "default": null }, + "podManagementPolicy": { + "description": "Specifies the policy for managing pods within the worker. Only applicable to StatefulSet.", + "type": [ + "null", + "string" + ], + "default": null, + "enum": [ + "OrderedReady", + "Parallel" + ] + }, "strategy": { "description": "Specifies the strategy used to replace old Pods by new ones when deployed as a Deployment.", "type": [ diff --git a/chart/values.yaml b/chart/values.yaml index 0edb9f2bd7cd3..9d4bcf2ca0709 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -105,7 +105,7 @@ images: pullPolicy: IfNotPresent statsd: repository: quay.io/prometheus/statsd-exporter - tag: v0.27.2-pr579 + tag: v0.27.2 pullPolicy: IfNotPresent redis: repository: redis @@ -590,6 +590,9 @@ workers: maxSurge: "100%" maxUnavailable: "50%" + # Allow relaxing ordering guarantees while preserving its uniqueness and identity + # podManagementPolicy: Parallel + # When not set, the values defined in the global securityContext will be used securityContext: {} # runAsUser: 50000 diff --git a/clients/python/README.md b/clients/python/README.md index e1427fce92d46..7bba821e43e62 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -248,7 +248,7 @@ For more information, please visit [https://airflow.apache.org](https://airflow. ## Requirements. -Python >=3.8 +Python >=3.9 ## Installation & Usage diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 1a5ccdc9e2b63..1584744249646 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -25,7 +25,7 @@ dynamic = ["version"] description = "Apache Airflow API (Stable)" readme = "README.md" license-files.globs = ["LICENSE", "NOTICE"] -requires-python = "~=3.8" +requires-python = "~=3.9" authors = [ { name = "Apache Software Foundation", email = "dev@airflow.apache.org" }, ] @@ -42,7 +42,6 @@ classifiers = [ "Intended Audience :: Developers", "Intended Audience :: System Administrators", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -74,7 +73,7 @@ run-coverage = "pytest test" run = "run-coverage --no-cov" [[tool.hatch.envs.test.matrix]] -python = ["3.8", "3.9", "3.10", "3.11"] +python = ["3.9", "3.10", "3.11"] [tool.hatch.version] path = "./version.txt" diff --git a/clients/python/test_python_client.py b/clients/python/test_python_client.py index d4d3f98efd834..5d0accdc019ff 100644 --- a/clients/python/test_python_client.py +++ b/clients/python/test_python_client.py @@ -17,7 +17,7 @@ # # PEP 723 compliant inline script metadata (not yet widely supported) # /// script -# requires-python = ">=3.8" +# requires-python = ">=3.9" # dependencies = [ # "apache-airflow-client", # "rich", diff --git a/constraints/README.md b/constraints/README.md index 791450d1bd7c9..9d02755dc5cd6 100644 --- a/constraints/README.md +++ b/constraints/README.md @@ -29,12 +29,12 @@ This allows you to iterate on dependencies without having to run `--upgrade-to-n Typical workflow in this case is: * download and copy the constraint file to the folder (for example via -[The GitHub Raw Link](https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt) +[The GitHub Raw Link](https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.9.txt) * modify the constraint file in "constraints" folder * build the image using this command ```bash -breeze ci-image build --python 3.8 --airflow-constraints-location constraints/constraints-3.8txt +breeze ci-image build --python 3.9 --airflow-constraints-location constraints/constraints-3.9.txt ``` You can continue iterating and updating the constraint file (and rebuilding the image) @@ -46,7 +46,7 @@ pip freeze | sort | \ grep -v "apache_airflow" | \ grep -v "apache-airflow==" | \ grep -v "@" | \ - grep -v "/opt/airflow" > /opt/airflow/constraints/constraints-3.8.txt + grep -v "/opt/airflow" > /opt/airflow/constraints/constraints-3.9.txt ``` If you are working with others on updating the dependencies, you can also commit the constraint diff --git a/contributing-docs/03_contributors_quick_start.rst b/contributing-docs/03_contributors_quick_start.rst index 8f7ead6deacc4..4ee65da0350b8 100644 --- a/contributing-docs/03_contributors_quick_start.rst +++ b/contributing-docs/03_contributors_quick_start.rst @@ -256,7 +256,7 @@ Setting up Breeze .. code-block:: bash - breeze --python 3.8 --backend postgres + breeze --python 3.9 --backend postgres .. note:: If you encounter an error like "docker.credentials.errors.InitializationError: @@ -273,15 +273,14 @@ Setting up Breeze means that you are inside the Breeze container and ready to run most of the development tasks. You can leave the environment with ``exit`` and re-enter it with just ``breeze`` command. -.. code-block:: bash - - root@b76fcb399bb6:/opt/airflow# airflow db reset - - 6. Once you enter breeze environment, create airflow tables and users from the breeze CLI. ``airflow db reset`` is required to execute at least once for Airflow Breeze to get the database/tables created. If you run tests, however - the test database will be initialized automatically for you. +.. code-block:: bash + + root@b76fcb399bb6:/opt/airflow# airflow db reset + .. code-block:: bash root@b76fcb399bb6:/opt/airflow# airflow users create \ @@ -313,7 +312,7 @@ Using Breeze ------------ 1. Starting breeze environment using ``breeze start-airflow`` starts Breeze environment with last configuration run( - In this case python and backend will be picked up from last execution ``breeze --python 3.8 --backend postgres``) + In this case python and backend will be picked up from last execution ``breeze --python 3.9 --backend postgres``) It also automatically starts webserver, backend and scheduler. It drops you in tmux with scheduler in bottom left and webserver in bottom right. Use ``[Ctrl + B] and Arrow keys`` to navigate. @@ -324,9 +323,9 @@ Using Breeze Use CI image. Branch name: main - Docker image: ghcr.io/apache/airflow/main/ci/python3.8:latest + Docker image: ghcr.io/apache/airflow/main/ci/python3.9:latest Airflow source version: 2.4.0.dev0 - Python version: 3.8 + Python version: 3.9 Backend: mysql 5.7 @@ -365,7 +364,7 @@ Using Breeze .. code-block:: bash - breeze --python 3.8 --backend postgres + breeze --python 3.9 --backend postgres 2. Open tmux @@ -601,34 +600,27 @@ All Tests are inside ./tests directory. root@63528318c8b1:/opt/airflow# pytest tests/utils/test_dates.py ============================================================= test session starts ============================================================== - platform linux -- Python 3.8.16, pytest-7.2.1, pluggy-1.0.0 -- /usr/local/bin/python + platform linux -- Python 3.9.20, pytest-8.3.3, pluggy-1.5.0 -- /usr/local/bin/python cachedir: .pytest_cache - rootdir: /opt/airflow, configfile: pytest.ini - plugins: timeouts-1.2.1, capture-warnings-0.0.4, cov-4.0.0, requests-mock-1.10.0, rerunfailures-11.1.1, anyio-3.6.2, instafail-0.4.2, time-machine-2.9.0, asyncio-0.20.3, httpx-0.21.3, xdist-3.2.0 - asyncio: mode=strict + rootdir: /opt/airflow + configfile: pyproject.toml + plugins: anyio-4.6.0, time-machine-2.15.0, icdiff-0.9, rerunfailures-14.0, instafail-0.5.0, custom-exit-code-0.3.0, xdist-3.6.1, mock-3.14.0, cov-5.0.0, asyncio-0.24.0, requests-mock-1.12.1, timeouts-1.2.1 + asyncio: mode=strict, default_loop_scope=None setup timeout: 0.0s, execution timeout: 0.0s, teardown timeout: 0.0s - collected 12 items - - tests/utils/test_dates.py::TestDates::test_days_ago PASSED [ 8%] - tests/utils/test_dates.py::TestDates::test_parse_execution_date PASSED [ 16%] - tests/utils/test_dates.py::TestDates::test_round_time PASSED [ 25%] - tests/utils/test_dates.py::TestDates::test_infer_time_unit PASSED [ 33%] - tests/utils/test_dates.py::TestDates::test_scale_time_units PASSED [ 41%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_no_delta PASSED [ 50%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_end_date_before_start_date PASSED [ 58%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_both_end_date_and_num_given PASSED [ 66%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_invalid_delta PASSED [ 75%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_positive_num_given PASSED [ 83%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_negative_num_given PASSED [ 91%] - tests/utils/test_dates.py::TestUtilsDatesDateRange::test_delta_cron_presets PASSED [100%] - - ============================================================== 12 passed in 0.24s ============================================================== + collected 4 items + + tests/utils/test_dates.py::TestDates::test_parse_execution_date PASSED [ 25%] + tests/utils/test_dates.py::TestDates::test_round_time PASSED [ 50%] + tests/utils/test_dates.py::TestDates::test_infer_time_unit PASSED [ 75%] + tests/utils/test_dates.py::TestDates::test_scale_time_units PASSED [100%] + + ================================================================== 4 passed in 3.30s =================================================================== - Running All the test with Breeze by specifying required python version, backend, backend version .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type All + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type All - Running specific type of test @@ -638,7 +630,7 @@ All Tests are inside ./tests directory. .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type Core + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type Core - Running Integration test for specific test type @@ -647,7 +639,7 @@ All Tests are inside ./tests directory. .. code-block:: bash - breeze --backend postgres --postgres-version 15 --python 3.8 --db-reset testing tests --test-type All --integration mongo + breeze --backend postgres --postgres-version 15 --python 3.9 --db-reset testing tests --test-type All --integration mongo - For more information on Testing visit : |09_testing.rst| diff --git a/contributing-docs/05_pull_requests.rst b/contributing-docs/05_pull_requests.rst index ea9300f9c643f..1e14167943497 100644 --- a/contributing-docs/05_pull_requests.rst +++ b/contributing-docs/05_pull_requests.rst @@ -92,7 +92,7 @@ these guidelines: you can push your code to PR and see results of the tests in the CI. - You can use any supported python version to run the tests, but the best is to check - if it works for the oldest supported version (Python 3.8 currently). In rare cases + if it works for the oldest supported version (Python 3.9 currently). In rare cases tests might fail with the oldest version when you use features that are available in newer Python versions. For that purpose we have ``airflow.compat`` package where we keep back-ported useful features from newer versions. diff --git a/contributing-docs/07_local_virtualenv.rst b/contributing-docs/07_local_virtualenv.rst index 8439eb2ab2089..2c92edee97a7b 100644 --- a/contributing-docs/07_local_virtualenv.rst +++ b/contributing-docs/07_local_virtualenv.rst @@ -37,7 +37,7 @@ Required Software Packages Use system-level package managers like yum, apt-get for Linux, or Homebrew for macOS to install required software packages: -* Python (One of: 3.8, 3.9, 3.10, 3.11, 3.12) +* Python (One of: 3.9, 3.10, 3.11, 3.12) * MySQL 5.7+ * libxml * helm (only for helm chart tests) @@ -187,9 +187,7 @@ This is what it shows currently: +-------------+---------+---------------------------------------------------------------+ | Name | Type | Description | +=============+=========+===============================================================+ -| default | virtual | Default environment with Python 3.8 for maximum compatibility | -+-------------+---------+---------------------------------------------------------------+ -| airflow-38 | virtual | Environment with Python 3.8. No devel installed. | +| default | virtual | Default environment with Python 3.9 for maximum compatibility | +-------------+---------+---------------------------------------------------------------+ | airflow-39 | virtual | Environment with Python 3.9. No devel installed. | +-------------+---------+---------------------------------------------------------------+ @@ -200,7 +198,7 @@ This is what it shows currently: | airflow-312 | virtual | Environment with Python 3.12. No devel installed | +-------------+---------+---------------------------------------------------------------+ -The default env (if you have not used one explicitly) is ``default`` and it is a Python 3.8 +The default env (if you have not used one explicitly) is ``default`` and it is a Python 3.9 virtualenv for maximum compatibility. You can install devel set of dependencies with it by running: @@ -381,12 +379,12 @@ to avoid "works-for-me" syndrome, where you use different version of dependencie that are used in main, CI tests and by other contributors. There are different constraint files for different python versions. For example this command will install -all basic devel requirements and requirements of google provider as last successfully tested for Python 3.8: +all basic devel requirements and requirements of google provider as last successfully tested for Python 3.9: .. code:: bash pip install -e ".[devel,google]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.9.txt" Make sure to use latest main for such installation, those constraints are "development constraints" and they are refreshed several times a day to make sure they are up to date with the latest changes in the main branch. @@ -403,7 +401,7 @@ and install to latest supported ones by pure airflow core. .. code:: bash pip install -e ".[devel]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.9.txt" These are examples of the development options available with the local virtualenv in your IDE: diff --git a/contributing-docs/08_static_code_checks.rst b/contributing-docs/08_static_code_checks.rst index d50b9db3e607f..f064a13a773b6 100644 --- a/contributing-docs/08_static_code_checks.rst +++ b/contributing-docs/08_static_code_checks.rst @@ -40,7 +40,7 @@ use. So, you can be sure your modifications will also work for CI if they pass pre-commit hooks. We have integrated the fantastic `pre-commit `__ framework -in our development workflow. To install and use it, you need at least Python 3.8 locally. +in our development workflow. To install and use it, you need at least Python 3.9 locally. Installing pre-commit hooks --------------------------- @@ -152,8 +152,6 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-common-compat-used-for-openlineage | Check common.compat is used for OL deprecated classes | | +-----------------------------------------------------------+--------------------------------------------------------+---------+ -| check-compat-cache-on-methods | Check that compat cache do not use on class methods | | -+-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-core-deprecation-classes | Verify usage of Airflow deprecation classes in core | | +-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-daysago-import-from-utils | days_ago imported from airflow.utils.dates | | @@ -214,8 +212,6 @@ require Breeze Docker image to be built locally. +-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-provider-yaml-valid | Validate provider.yaml files | * | +-----------------------------------------------------------+--------------------------------------------------------+---------+ -| check-providers-init-file-missing | Provider init file is missing | | -+-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-providers-subpackages-init-file-exist | Provider subpackage init files are there | | +-----------------------------------------------------------+--------------------------------------------------------+---------+ | check-pydevd-left-in-code | Check for pydevd debug statements accidentally left | | diff --git a/contributing-docs/12_airflow_dependencies_and_extras.rst b/contributing-docs/12_airflow_dependencies_and_extras.rst index 16d2f32ee172d..8bfbdb630c9f7 100644 --- a/contributing-docs/12_airflow_dependencies_and_extras.rst +++ b/contributing-docs/12_airflow_dependencies_and_extras.rst @@ -86,7 +86,7 @@ from the PyPI package: .. code-block:: bash pip install "apache-airflow[google,amazon,async]==2.2.5" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.5/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.2.5/constraints-3.9.txt" The last one can be used to install Airflow in "minimal" mode - i.e when bare Airflow is installed without extras. @@ -98,7 +98,7 @@ requirements). .. code-block:: bash pip install -e ".[devel]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.9.txt" This also works with extras - for example: @@ -106,7 +106,7 @@ This also works with extras - for example: .. code-block:: bash pip install ".[ssh]" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-source-providers-3.9.txt" There are different set of fixed constraint files for different python major/minor versions and you should @@ -118,7 +118,7 @@ using ``constraints-no-providers`` constraint files as well. .. code-block:: bash pip install . --upgrade \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-no-providers-3.9.txt" The ``constraints-.txt`` and ``constraints-no-providers-.txt`` diff --git a/contributing-docs/14_node_environment_setup.rst b/contributing-docs/14_node_environment_setup.rst index 81ced88240ac5..25635a89765e1 100644 --- a/contributing-docs/14_node_environment_setup.rst +++ b/contributing-docs/14_node_environment_setup.rst @@ -36,8 +36,8 @@ Committers will exercise their judgement on what endpoints should exist in the p Airflow UI ---------- -``airflow/ui`` is our React frontend powered. Dependencies are managed by pnpm and dev/build processes by `Vite `__ -Make sure you are using recent versions of ``pnpm\>=9`` and ``node/>=20``. ``breeze start-airflow`` will build the UI automatically. +``airflow/ui`` is our React frontend powered. Dependencies are managed by pnpm and dev/build processes by `Vite `__. +Make sure you are using recent versions of ``pnpm>=9`` and ``node>=20``. ``breeze start-airflow`` will build the UI automatically. Adding the ``--dev-mode`` flag will automatically run the vite dev server for hot reloading the UI during local development. pnpm commands diff --git a/contributing-docs/quick-start-ide/contributors_quick_start_pycharm.rst b/contributing-docs/quick-start-ide/contributors_quick_start_pycharm.rst index d830496b27206..4a3319ae97dd1 100644 --- a/contributing-docs/quick-start-ide/contributors_quick_start_pycharm.rst +++ b/contributing-docs/quick-start-ide/contributors_quick_start_pycharm.rst @@ -78,35 +78,14 @@ It requires "airflow-env" virtual environment configured locally. - Copy any example DAG present in the ``/airflow/example_dags`` directory to ``/files/dags/``. -- Add a ``__main__`` block at the end of your DAG file to make it runnable. It will run a ``back_fill`` job: +- Add a ``__main__`` block at the end of your DAG file to make it runnable: .. code-block:: python if __name__ == "__main__": - dag.clear() - dag.run() + dag.test() -- Add ``AIRFLOW__CORE__EXECUTOR=DebugExecutor`` to Environment variable of Run Configuration. - - - Click on Add configuration - - .. raw:: html - -
- Add Configuration pycharm -
- - - Add Script Path and Environment Variable to new Python configuration - - .. raw:: html - -
- Add environment variable pycharm -
- -- Now Debug an example dag and view the entries in tables such as ``dag_run, xcom`` etc in MySQL Workbench. +- Run the file. Creating a branch ################# diff --git a/contributing-docs/quick-start-ide/contributors_quick_start_vscode.rst b/contributing-docs/quick-start-ide/contributors_quick_start_vscode.rst index 88ff1fdd84e52..61fdf501063db 100644 --- a/contributing-docs/quick-start-ide/contributors_quick_start_vscode.rst +++ b/contributing-docs/quick-start-ide/contributors_quick_start_vscode.rst @@ -72,8 +72,7 @@ Setting up debugging if __name__ == "__main__": - dag.clear() - dag.run() + dag.test() - Add ``"AIRFLOW__CORE__EXECUTOR": "DebugExecutor"`` to the ``"env"`` field of Debug configuration. diff --git a/contributing-docs/quick-start-ide/images/pycharm_add_configuration.png b/contributing-docs/quick-start-ide/images/pycharm_add_configuration.png deleted file mode 100644 index 525b73e6141ab..0000000000000 Binary files a/contributing-docs/quick-start-ide/images/pycharm_add_configuration.png and /dev/null differ diff --git a/contributing-docs/quick-start-ide/images/pycharm_add_env_variable.png b/contributing-docs/quick-start-ide/images/pycharm_add_env_variable.png deleted file mode 100644 index f408372211400..0000000000000 Binary files a/contributing-docs/quick-start-ide/images/pycharm_add_env_variable.png and /dev/null differ diff --git a/contributing-docs/testing/dag_testing.rst b/contributing-docs/testing/dag_testing.rst index 7e311171ce019..0bf506c2f321a 100644 --- a/contributing-docs/testing/dag_testing.rst +++ b/contributing-docs/testing/dag_testing.rst @@ -20,31 +20,22 @@ DAG Testing =========== To ease and speed up the process of developing DAGs, you can use -py:class:`~airflow.executors.debug_executor.DebugExecutor`, which is a single process executor -for debugging purposes. Using this executor, you can run and debug DAGs from your IDE. +py:meth:`~airflow.models.dag.DAG.test`, which will run a dag in a single process. To set up the IDE: 1. Add ``main`` block at the end of your DAG file to make it runnable. -It will run a backfill job: .. code-block:: python if __name__ == "__main__": - dag.clear() - dag.run() + dag.test() -2. Set up ``AIRFLOW__CORE__EXECUTOR=DebugExecutor`` in the run configuration of your IDE. - Make sure to also set up all environment variables required by your DAG. - 3. Run and debug the DAG file. -Additionally, ``DebugExecutor`` can be used in a fail-fast mode that will make -all other running or scheduled tasks fail immediately. To enable this option, set -``AIRFLOW__DEBUG__FAIL_FAST=True`` or adjust ``fail_fast`` option in your ``airflow.cfg``. -Also, with the Airflow CLI command ``airflow dags test``, you can execute one complete run of a DAG: +You can also run the dag in the same manner with the Airflow CLI command ``airflow dags test``: .. code-block:: bash diff --git a/contributing-docs/testing/docker_compose_tests.rst b/contributing-docs/testing/docker_compose_tests.rst index 94864b4137de8..921a3cafb193b 100644 --- a/contributing-docs/testing/docker_compose_tests.rst +++ b/contributing-docs/testing/docker_compose_tests.rst @@ -48,7 +48,7 @@ Running complete test with breeze: .. code-block:: bash - breeze prod-image build --python 3.8 + breeze prod-image build --python 3.9 breeze testing docker-compose-tests In case the test fails, it will dump the logs from the running containers to the console and it @@ -65,8 +65,8 @@ to see the output of the test as it happens (it can be also set via The test can be also run manually with ``pytest docker_tests/test_docker_compose_quick_start.py`` command, provided that you have a local airflow venv with ``dev`` extra set and the ``DOCKER_IMAGE`` environment variable is set to the image you want to test. The variable defaults -to ``ghcr.io/apache/airflow/main/prod/python3.8:latest`` which is built by default -when you run ``breeze prod-image build --python 3.8``. also the switches ``--skip-docker-compose-deletion`` +to ``ghcr.io/apache/airflow/main/prod/python3.9:latest`` which is built by default +when you run ``breeze prod-image build --python 3.9``. also the switches ``--skip-docker-compose-deletion`` and ``--wait-for-containers-timeout`` can only be passed via environment variables. If you want to debug the deployment using ``docker compose`` commands after ``SKIP_DOCKER_COMPOSE_DELETION`` @@ -87,7 +87,7 @@ the prod image build command above. .. code-block:: bash - export AIRFLOW_IMAGE_NAME=ghcr.io/apache/airflow/main/prod/python3.8:latest + export AIRFLOW_IMAGE_NAME=ghcr.io/apache/airflow/main/prod/python3.9:latest and follow the instructions in the `Running Airflow in Docker `_ diff --git a/contributing-docs/testing/k8s_tests.rst b/contributing-docs/testing/k8s_tests.rst index a4a6f67da0e2c..a9ba3151fe9bb 100644 --- a/contributing-docs/testing/k8s_tests.rst +++ b/contributing-docs/testing/k8s_tests.rst @@ -47,7 +47,7 @@ per each combination of Python and Kubernetes version. This is used during CI wh tests against those different clusters - even in parallel. The cluster name follows the pattern ``airflow-python-X.Y-vA.B.C`` where X.Y is a major/minor Python version -and A.B.C is Kubernetes version. Example cluster name: ``airflow-python-3.8-v1.24.0`` +and A.B.C is Kubernetes version. Example cluster name: ``airflow-python-3.9-v1.24.0`` Most of the commands can be executed in parallel for multiple images/clusters by adding ``--run-in-parallel`` to create clusters or deploy airflow. Similarly checking for status, dumping logs and deleting clusters @@ -215,7 +215,7 @@ Should result in KinD creating the K8S cluster. .. code-block:: text - Config created in /Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.8-v1.24.2/.kindconfig.yaml: + Config created in /Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.9-v1.24.2/.kindconfig.yaml: # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -251,7 +251,7 @@ Should result in KinD creating the K8S cluster. - Creating cluster "airflow-python-3.8-v1.24.2" ... + Creating cluster "airflow-python-3.9-v1.24.2" ... ✓ Ensuring node image (kindest/node:v1.24.2) 🖼 ✓ Preparing nodes 📦 📦 ✓ Writing configuration 📜 @@ -259,10 +259,10 @@ Should result in KinD creating the K8S cluster. ✓ Installing CNI 🔌 ✓ Installing StorageClass 💾 ✓ Joining worker nodes 🚜 - Set kubectl context to "kind-airflow-python-3.8-v1.24.2" + Set kubectl context to "kind-airflow-python-3.9-v1.24.2" You can now use your cluster with: - kubectl cluster-info --context kind-airflow-python-3.8-v1.24.2 + kubectl cluster-info --context kind-airflow-python-3.9-v1.24.2 Not sure what to do next? 😅 Check out https://kind.sigs.k8s.io/docs/user/quick-start/ @@ -270,9 +270,9 @@ Should result in KinD creating the K8S cluster. Connecting to localhost:18150. Num try: 1 Error when connecting to localhost:18150 : ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.8 --kubernetes-version v1.24.2` to (re)deploy airflow + Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.9 --kubernetes-version v1.24.2` to (re)deploy airflow - KinD cluster airflow-python-3.8-v1.24.2 created! + KinD cluster airflow-python-3.9-v1.24.2 created! NEXT STEP: You might now configure your cluster by: @@ -286,20 +286,20 @@ Should result in KinD creating the K8S cluster. .. code-block:: text - Configuring airflow-python-3.8-v1.24.2 to be ready for Airflow deployment - Deleting K8S namespaces for kind-airflow-python-3.8-v1.24.2 + Configuring airflow-python-3.9-v1.24.2 to be ready for Airflow deployment + Deleting K8S namespaces for kind-airflow-python-3.9-v1.24.2 Error from server (NotFound): namespaces "airflow" not found Error from server (NotFound): namespaces "test-namespace" not found Creating namespaces namespace/airflow created namespace/test-namespace created - Created K8S namespaces for cluster kind-airflow-python-3.8-v1.24.2 + Created K8S namespaces for cluster kind-airflow-python-3.9-v1.24.2 - Deploying test resources for cluster kind-airflow-python-3.8-v1.24.2 + Deploying test resources for cluster kind-airflow-python-3.9-v1.24.2 persistentvolume/test-volume created persistentvolumeclaim/test-volume created service/airflow-webserver-node-port created - Deployed test resources for cluster kind-airflow-python-3.8-v1.24.2 + Deployed test resources for cluster kind-airflow-python-3.9-v1.24.2 NEXT STEP: You might now build your k8s image by: @@ -317,45 +317,45 @@ Should show the status of current KinD cluster. .. code-block:: text ======================================================================================================================== - Cluster: airflow-python-3.8-v1.24.2 + Cluster: airflow-python-3.9-v1.24.2 - * KUBECONFIG=/Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.8-v1.24.2/.kubeconfig - * KINDCONFIG=/Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.8-v1.24.2/.kindconfig.yaml + * KUBECONFIG=/Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.9-v1.24.2/.kubeconfig + * KINDCONFIG=/Users/jarek/IdeaProjects/airflow/.build/.k8s-clusters/airflow-python-3.9-v1.24.2/.kindconfig.yaml - Cluster info: airflow-python-3.8-v1.24.2 + Cluster info: airflow-python-3.9-v1.24.2 Kubernetes control plane is running at https://127.0.0.1:48366 CoreDNS is running at https://127.0.0.1:48366/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy To further debug and diagnose cluster problems, use 'kubectl cluster-info dump'. - Storage class for airflow-python-3.8-v1.24.2 + Storage class for airflow-python-3.9-v1.24.2 NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE standard (default) rancher.io/local-path Delete WaitForFirstConsumer false 83s - Running pods for airflow-python-3.8-v1.24.2 + Running pods for airflow-python-3.9-v1.24.2 NAME READY STATUS RESTARTS AGE coredns-6d4b75cb6d-rwp9d 1/1 Running 0 71s coredns-6d4b75cb6d-vqnrc 1/1 Running 0 71s - etcd-airflow-python-3.8-v1.24.2-control-plane 1/1 Running 0 84s + etcd-airflow-python-3.9-v1.24.2-control-plane 1/1 Running 0 84s kindnet-ckc8l 1/1 Running 0 69s kindnet-qqt8k 1/1 Running 0 71s - kube-apiserver-airflow-python-3.8-v1.24.2-control-plane 1/1 Running 0 84s - kube-controller-manager-airflow-python-3.8-v1.24.2-control-plane 1/1 Running 0 84s + kube-apiserver-airflow-python-3.9-v1.24.2-control-plane 1/1 Running 0 84s + kube-controller-manager-airflow-python-3.9-v1.24.2-control-plane 1/1 Running 0 84s kube-proxy-6g7hn 1/1 Running 0 69s kube-proxy-dwfvp 1/1 Running 0 71s - kube-scheduler-airflow-python-3.8-v1.24.2-control-plane 1/1 Running 0 84s + kube-scheduler-airflow-python-3.9-v1.24.2-control-plane 1/1 Running 0 84s KinD Cluster API server URL: http://localhost:48366 Connecting to localhost:18150. Num try: 1 Error when connecting to localhost:18150 : ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')) - Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.8 --kubernetes-version v1.24.2` to (re)deploy airflow + Airflow webserver is not available at port 18150. Run `breeze k8s deploy-airflow --python 3.9 --kubernetes-version v1.24.2` to (re)deploy airflow - Cluster healthy: airflow-python-3.8-v1.24.2 + Cluster healthy: airflow-python-3.9-v1.24.2 5. Build the image base on PROD Airflow image. You need to build the PROD image first (the command will guide you if you did not) either by running the build separately or passing ``--rebuild-base-image`` @@ -373,15 +373,15 @@ Should show the status of current KinD cluster. .. code-block:: text - Building the K8S image for Python 3.8 using airflow base image: ghcr.io/apache/airflow/main/prod/python3.8:latest + Building the K8S image for Python 3.9 using airflow base image: ghcr.io/apache/airflow/main/prod/python3.9:latest [+] Building 0.1s (8/8) FINISHED => [internal] load build definition from Dockerfile 0.0s => => transferring dockerfile: 301B 0.0s => [internal] load .dockerignore 0.0s => => transferring context: 35B 0.0s - => [internal] load metadata for ghcr.io/apache/airflow/main/prod/python3.8:latest 0.0s - => [1/3] FROM ghcr.io/apache/airflow/main/prod/python3.8:latest 0.0s + => [internal] load metadata for ghcr.io/apache/airflow/main/prod/python3.9:latest 0.0s + => [1/3] FROM ghcr.io/apache/airflow/main/prod/python3.9:latest 0.0s => [internal] load build context 0.0s => => transferring context: 3.00kB 0.0s => CACHED [2/3] COPY airflow/example_dags/ /opt/airflow/dags/ 0.0s @@ -389,7 +389,7 @@ Should show the status of current KinD cluster. => exporting to image 0.0s => => exporting layers 0.0s => => writing image sha256:c0bdd363c549c3b0731b8e8ce34153d081f239ee2b582355b7b3ffd5394c40bb 0.0s - => => naming to ghcr.io/apache/airflow/main/prod/python3.8-kubernetes:latest + => => naming to ghcr.io/apache/airflow/main/prod/python3.9-kubernetes:latest NEXT STEP: You might now upload your k8s image by: @@ -409,9 +409,9 @@ Should show the status of current KinD cluster. Good version of kubectl installed: 1.25.0 in /Users/jarek/IdeaProjects/airflow/.build/.k8s-env/bin Good version of helm installed: 3.9.2 in /Users/jarek/IdeaProjects/airflow/.build/.k8s-env/bin Stable repo is already added - Uploading Airflow image ghcr.io/apache/airflow/main/prod/python3.8-kubernetes to cluster airflow-python-3.8-v1.24.2 - Image: "ghcr.io/apache/airflow/main/prod/python3.8-kubernetes" with ID "sha256:fb6195f7c2c2ad97788a563a3fe9420bf3576c85575378d642cd7985aff97412" not yet present on node "airflow-python-3.8-v1.24.2-worker", loading... - Image: "ghcr.io/apache/airflow/main/prod/python3.8-kubernetes" with ID "sha256:fb6195f7c2c2ad97788a563a3fe9420bf3576c85575378d642cd7985aff97412" not yet present on node "airflow-python-3.8-v1.24.2-control-plane", loading... + Uploading Airflow image ghcr.io/apache/airflow/main/prod/python3.9-kubernetes to cluster airflow-python-3.9-v1.24.2 + Image: "ghcr.io/apache/airflow/main/prod/python3.9-kubernetes" with ID "sha256:fb6195f7c2c2ad97788a563a3fe9420bf3576c85575378d642cd7985aff97412" not yet present on node "airflow-python-3.9-v1.24.2-worker", loading... + Image: "ghcr.io/apache/airflow/main/prod/python3.9-kubernetes" with ID "sha256:fb6195f7c2c2ad97788a563a3fe9420bf3576c85575378d642cd7985aff97412" not yet present on node "airflow-python-3.9-v1.24.2-control-plane", loading... NEXT STEP: You might now deploy airflow by: @@ -426,8 +426,8 @@ Should show the status of current KinD cluster. .. code-block:: text - Deploying Airflow for cluster airflow-python-3.8-v1.24.2 - Deploying kind-airflow-python-3.8-v1.24.2 with airflow Helm Chart. + Deploying Airflow for cluster airflow-python-3.9-v1.24.2 + Deploying kind-airflow-python-3.9-v1.24.2 with airflow Helm Chart. Copied chart sources to /private/var/folders/v3/gvj4_mw152q556w2rrh7m46w0000gn/T/chart_edu__kir/chart Deploying Airflow from /private/var/folders/v3/gvj4_mw152q556w2rrh7m46w0000gn/T/chart_edu__kir/chart NAME: airflow @@ -469,12 +469,12 @@ Should show the status of current KinD cluster. Information on how to set a static webserver secret key can be found here: https://airflow.apache.org/docs/helm-chart/stable/production-guide.html#webserver-secret-key - Deployed kind-airflow-python-3.8-v1.24.2 with airflow Helm Chart. + Deployed kind-airflow-python-3.9-v1.24.2 with airflow Helm Chart. - Airflow for Python 3.8 and K8S version v1.24.2 has been successfully deployed. + Airflow for Python 3.9 and K8S version v1.24.2 has been successfully deployed. - The KinD cluster name: airflow-python-3.8-v1.24.2 - The kubectl cluster name: kind-airflow-python-3.8-v1.24.2. + The KinD cluster name: airflow-python-3.9-v1.24.2 + The kubectl cluster name: kind-airflow-python-3.9-v1.24.2. KinD Cluster API server URL: http://localhost:48366 @@ -508,7 +508,7 @@ The virtualenv required will be created automatically when the scripts are run. .. code-block:: text - Running tests with kind-airflow-python-3.8-v1.24.2 cluster. + Running tests with kind-airflow-python-3.9-v1.24.2 cluster. Command to run: pytest kubernetes_tests ========================================================================================= test session starts ========================================================================================== platform darwin -- Python 3.9.9, pytest-6.2.5, py-1.11.0, pluggy-1.0.0 -- /Users/jarek/IdeaProjects/airflow/.build/.k8s-env/bin/python @@ -537,7 +537,7 @@ Once you enter the environment, you receive this information: Entering interactive k8s shell. - (kind-airflow-python-3.8-v1.24.2:KubernetesExecutor)> + (kind-airflow-python-3.9-v1.24.2:KubernetesExecutor)> In a separate terminal you can open the k9s CLI: @@ -647,9 +647,9 @@ Kind has also useful commands to inspect your running cluster: .. code-block:: text - Deleting KinD cluster airflow-python-3.8-v1.24.2! - Deleting cluster "airflow-python-3.8-v1.24.2" ... - KinD cluster airflow-python-3.8-v1.24.2 deleted! + Deleting KinD cluster airflow-python-3.9-v1.24.2! + Deleting cluster "airflow-python-3.9-v1.24.2" ... + KinD cluster airflow-python-3.9-v1.24.2 deleted! Running complete k8s tests diff --git a/contributing-docs/testing/system_tests.rst b/contributing-docs/testing/system_tests.rst index 867b89582f165..fe010f76bd5d5 100644 --- a/contributing-docs/testing/system_tests.rst +++ b/contributing-docs/testing/system_tests.rst @@ -35,7 +35,7 @@ Airflow system tests are pretty special because they serve three purposes: Old System Tests ---------------- -The system tests derive from the ``tests.test_utils.system_test_class.SystemTests`` class. +The system tests derive from the ``tests_common.test_utils.system_test_class.SystemTests`` class. Old versions of System tests should also be marked with ``@pytest.marker.system(SYSTEM)`` where ``system`` designates the system to be tested (for example, ``google.cloud``). These tests are skipped by default. diff --git a/contributing-docs/testing/unit_tests.rst b/contributing-docs/testing/unit_tests.rst index cd0b9ab00bf44..dc4e27f10d9fe 100644 --- a/contributing-docs/testing/unit_tests.rst +++ b/contributing-docs/testing/unit_tests.rst @@ -209,7 +209,7 @@ rerun in Breeze as you will (``-n auto`` will parallelize tests using ``pytest-x .. code-block:: bash - breeze shell --backend none --python 3.8 + breeze shell --backend none --python 3.9 > pytest tests --skip-db-tests -n auto @@ -251,7 +251,7 @@ You can also run DB tests with ``breeze`` dockerized environment. You can choose ``--backend`` flag. The default is ``sqlite`` but you can also use others such as ``postgres`` or ``mysql``. You can also select backend version and Python version to use. You can specify the ``test-type`` to run - breeze will list the test types you can run with ``--help`` and provide auto-complete for them. Example -below runs the ``Core`` tests with ``postgres`` backend and ``3.8`` Python version: +below runs the ``Core`` tests with ``postgres`` backend and ``3.9`` Python version: We have a dedicated, opinionated ``breeze testing db-tests`` command as well that runs DB tests (it is also used in CI to run the DB tests, where you do not have to specify extra flags for @@ -286,7 +286,7 @@ either by package/module/test or by test type - whatever ``pytest`` supports. .. code-block:: bash - breeze shell --backend postgres --python 3.8 + breeze shell --backend postgres --python 3.9 > pytest tests --run-db-tests-only As explained before, you cannot run DB tests in parallel using ``pytest-xdist`` plugin, but ``breeze`` has @@ -296,7 +296,7 @@ you use ``breeze testing db-tests`` command): .. code-block:: bash - breeze testing tests --run-db-tests-only --backend postgres --python 3.8 --run-in-parallel + breeze testing tests --run-db-tests-only --backend postgres --python 3.9 --run-in-parallel Examples of marking test as DB test ................................... @@ -952,7 +952,7 @@ will ask you to rebuild the image if it is needed and some new dependencies shou .. code-block:: bash - breeze testing tests tests/providers/http/hooks/test_http.py tests/core/test_core.py --db-reset --log-cli-level=DEBUG + breeze testing tests providers/tests/http/hooks/test_http.py tests/core/test_core.py --db-reset --log-cli-level=DEBUG You can run the whole test suite without adding the test target: @@ -1133,7 +1133,7 @@ directly to the container. .. code-block:: bash - breeze ci-image build --python 3.8 + breeze ci-image build --python 3.9 2. Enter breeze environment by selecting the appropriate airflow version and choosing ``providers-and-tests`` option for ``--mount-sources`` flag. @@ -1146,7 +1146,7 @@ directly to the container. .. code-block:: bash - pytest tests/providers//test.py + pytest providers/tests//test.py 4. Iterate with the tests and providers. Both providers and tests are mounted from local sources so changes you do locally in both - tests and provider sources are immediately reflected inside the @@ -1171,7 +1171,7 @@ are not part of the public API. We deal with it in one of the following ways: 1) If the whole provider is supposed to only work for later airflow version, we remove the whole provider by excluding it from compatibility test configuration (see below) -2) Some compatibility shims are defined in ``tests/test_utils/compat.py`` - and they can be used to make the +2) Some compatibility shims are defined in ``tests_common.test_utils/compat.py`` - and they can be used to make the tests compatible - for example importing ``ParseImportError`` after the exception has been renamed from ``ImportError`` and it would fail in Airflow 2.9, but we have a fallback import in ``compat.py`` that falls back to old import automatically, so all tests testing / expecting ``ParseImportError`` should import @@ -1184,7 +1184,7 @@ are not part of the public API. We deal with it in one of the following ways: .. code-block:: python - from tests.test_utils.compat import AIRFLOW_V_2_8_PLUS + from tests_common.test_utils.compat import AIRFLOW_V_2_8_PLUS @pytest.mark.skipif(not AIRFLOW_V_2_8_PLUS, reason="The tests should be skipped for Airflow < 2.8") @@ -1197,6 +1197,9 @@ are not part of the public API. We deal with it in one of the following ways: .. code-block:: python + from tests_common import RUNNING_TESTS_AGAINST_AIRFLOW_PACKAGES + + @pytest.mark.skipif( RUNNING_TESTS_AGAINST_AIRFLOW_PACKAGES, reason="Plugin initialization is done early in case of packages" ) @@ -1241,7 +1244,7 @@ Herr id how to reproduce it. .. code-block:: bash - breeze ci-image build --python 3.8 + breeze ci-image build --python 3.9 2. Build providers from latest sources: @@ -1280,7 +1283,7 @@ In case you want to reproduce canary run, you need to add ``--clean-airflow-inst .. code-block:: bash - pytest tests/providers//test.py + pytest providers/tests//test.py 7. Iterate with the tests diff --git a/dev/README_RELEASE_AIRFLOW.md b/dev/README_RELEASE_AIRFLOW.md index e0e014cfcb13d..1f56def27646c 100644 --- a/dev/README_RELEASE_AIRFLOW.md +++ b/dev/README_RELEASE_AIRFLOW.md @@ -682,7 +682,7 @@ Optionally it can be followed with constraints ```shell script pip install apache-airflow==rc \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-/constraints-3.8.txt"` + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-/constraints-3.9.txt"` ``` Note that the constraints contain python version that you are installing it with. @@ -694,7 +694,7 @@ There is also an easy way of installation with Breeze if you have the latest sou Running the following command will use tmux inside breeze, create `admin` user and run Webserver & Scheduler: ```shell script -breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.8 --backend postgres +breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.9 --backend postgres ``` You can also choose different executors and extras to install when you are installing airflow this way. For @@ -702,7 +702,7 @@ example in order to run Airflow with CeleryExecutor and install celery, google a Airflow 2.7.0, you need to have celery provider installed to run Airflow with CeleryExecutor) you can run: ```shell script -breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.8 --backend postgres \ +breeze start-airflow --use-airflow-version 2.7.0rc1 --python 3.9 --backend postgres \ --executor CeleryExecutor --airflow-extras "celery,google,amazon" ``` @@ -838,7 +838,7 @@ the older branches, you should set the "skip" field to true. ## Verify production images ```shell script -for PYTHON in 3.8 3.9 3.10 3.11 3.12 +for PYTHON in 3.9 3.10 3.11 3.12 do docker pull apache/airflow:${VERSION}-python${PYTHON} breeze prod-image verify --image-name apache/airflow:${VERSION}-python${PYTHON} diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 67d1bfa30fc07..dbb785200f74b 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -1031,7 +1031,7 @@ pip install apache-airflow-providers-==rc ### Installing with Breeze ```shell -breeze start-airflow --use-airflow-version 2.2.4 --python 3.8 --backend postgres \ +breeze start-airflow --use-airflow-version 2.2.4 --python 3.9 --backend postgres \ --load-example-dags --load-default-connections ``` diff --git a/dev/breeze/README.md b/dev/breeze/README.md index 15c0b66f57f46..d28cfd5353a2c 100644 --- a/dev/breeze/README.md +++ b/dev/breeze/README.md @@ -66,6 +66,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT. --------------------------------------------------------------------------------------------------------- -Package config hash: 8e382ff46231b261a569886a45480104eb5436434d2845c3eb011ee9dd4da3c2fa33f561eaa36f2245a29c8719ae2e86d7ffec39463c46e0b3b4bde56a27abe6 +Package config hash: 2ae1201c56227b6fcb599f020360a906100a80b32ed3a0d4927c8721e738afee3867f9ed567fd75ec9f368933c3a94c1336f8ab068f7892ed1ebe6244ccf20fe --------------------------------------------------------------------------------------------------------- diff --git a/dev/breeze/doc/01_installation.rst b/dev/breeze/doc/01_installation.rst index 7107be900d97b..1c7ad0ee62838 100644 --- a/dev/breeze/doc/01_installation.rst +++ b/dev/breeze/doc/01_installation.rst @@ -313,7 +313,7 @@ that Breeze works on .. warning:: Upgrading from earlier Python version - If you used Breeze with Python 3.7 and when running it, it will complain that it needs Python 3.8. In this + If you used Breeze with Python 3.8 and when running it, it will complain that it needs Python 3.9. In this case you should force-reinstall Breeze with ``pipx``: .. code-block:: bash @@ -342,14 +342,14 @@ that Breeze works on .. code-block:: bash - pipx reinstall --python /Users/airflow/.pyenv/versions/3.8.16/bin/python apache-airflow-breeze + pipx reinstall --python /Users/airflow/.pyenv/versions/3.9.16/bin/python apache-airflow-breeze Or you can uninstall breeze and install it with a specific python version: .. code-block:: bash pipx uninstall apache-airflow-breeze - pipx install -e ./dev/breeze --python /Users/airflow/.pyenv/versions/3.8.16/bin/python + pipx install -e ./dev/breeze --python /Users/airflow/.pyenv/versions/3.9.16/bin/python Running Breeze for the first time diff --git a/dev/breeze/doc/03_developer_tasks.rst b/dev/breeze/doc/03_developer_tasks.rst index 76f43606837e8..87bb2713b93fa 100644 --- a/dev/breeze/doc/03_developer_tasks.rst +++ b/dev/breeze/doc/03_developer_tasks.rst @@ -34,12 +34,12 @@ You can use additional ``breeze`` flags to choose your environment. You can spec version to use, and backend (the meta-data database). Thanks to that, with Breeze, you can recreate the same environments as we have in matrix builds in the CI. See next chapter for backend selection. -For example, you can choose to run Python 3.8 tests with MySQL as backend and with mysql version 8 +For example, you can choose to run Python 3.9 tests with MySQL as backend and with mysql version 8 as follows: .. code-block:: bash - breeze --python 3.8 --backend mysql --mysql-version 8.0 + breeze --python 3.9 --backend mysql --mysql-version 8.0 .. note:: Note for Windows WSL2 users @@ -55,7 +55,7 @@ Try adding ``--builder=default`` to your command. For example: .. code-block:: bash - breeze --builder=default --python 3.8 --backend mysql --mysql-version 8.0 + breeze --builder=default --python 3.9 --backend mysql --mysql-version 8.0 The choices you make are persisted in the ``./.build/`` cache directory so that next time when you use the ``breeze`` script, it could use the values that were used previously. This way you do not have to specify @@ -331,7 +331,7 @@ When you are starting airflow from local sources, www asset compilation is autom .. code-block:: bash - breeze --python 3.8 --backend mysql start-airflow + breeze --python 3.9 --backend mysql start-airflow You can also use it to start different executor. @@ -344,7 +344,7 @@ You can also use it to start any released version of Airflow from ``PyPI`` with .. code-block:: bash - breeze start-airflow --python 3.8 --backend mysql --use-airflow-version 2.7.0 + breeze start-airflow --python 3.9 --backend mysql --use-airflow-version 2.7.0 When you are installing version from PyPI, it's also possible to specify extras that should be used when installing Airflow - you can provide several extras separated by coma - for example to install diff --git a/dev/breeze/doc/04_troubleshooting.rst b/dev/breeze/doc/04_troubleshooting.rst index fd0b1dfa401dc..fd5f92b03ea35 100644 --- a/dev/breeze/doc/04_troubleshooting.rst +++ b/dev/breeze/doc/04_troubleshooting.rst @@ -72,6 +72,38 @@ describe your problem. stated in `This comment `_ and allows to run Breeze with no problems. +Cannot import name 'cache' or Python >=3.9 required +--------------------------------------------------- + +When you see this error: + +.. code-block:: + + ImportError: cannot import name 'cache' from 'functools' (/Users/jarek/Library/Application Support/hatch/pythons/3.8/python/lib/python3.8/functools.py) + +or + +.. code-block:: + + ERROR: Package 'blacken-docs' requires a different Python: 3.8.18 not in '>=3.9' + + +It means that your pre-commit hook is installed with (already End-Of-Life) Python 3.8 and you should reinstall +it and clean pre-commit cache. + +This can be done (if you use ``pipx`` to install ``pre-commit``): + +.. code-block:: bash + + pipx uninstall pre-commit + pipx install pre-commit --python $(which python3.9) --force + pre-commit clean + pre-commit install + +If you installed ``pre-commit`` differently, you should remove and reinstall +it (and clean cache) in the way you installed it. + + Bad Interpreter Error --------------------- diff --git a/dev/breeze/doc/06_managing_docker_images.rst b/dev/breeze/doc/06_managing_docker_images.rst index 294f1540f3667..bb4c4f9e06f62 100644 --- a/dev/breeze/doc/06_managing_docker_images.rst +++ b/dev/breeze/doc/06_managing_docker_images.rst @@ -140,10 +140,10 @@ suffix and they need to also be paired with corresponding runtime dependency add .. code-block:: bash - breeze prod-image build --python 3.8 --additional-dev-deps "libasound2-dev" \ + breeze prod-image build --python 3.9 --additional-dev-deps "libasound2-dev" \ --additional-runtime-apt-deps "libasound2" -Same as above but uses python 3.8. +Same as above but uses python 3.9. Building PROD image ................... diff --git a/dev/breeze/doc/09_release_management_tasks.rst b/dev/breeze/doc/09_release_management_tasks.rst index 930f61159d16e..9cef5d6ccd15e 100644 --- a/dev/breeze/doc/09_release_management_tasks.rst +++ b/dev/breeze/doc/09_release_management_tasks.rst @@ -26,7 +26,7 @@ do not need or have no access to run). Those are usually connected with releasin Those are all of the available release management commands: .. image:: ./images/output_release-management.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management.svg :width: 100% :alt: Breeze release management @@ -55,7 +55,7 @@ default is to build ``both`` type of packages ``sdist`` and ``wheel``. breeze release-management prepare-airflow-package --package-format=wheel .. image:: ./images/output_release-management_prepare-airflow-package.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-airflow-package.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-airflow-package.svg :width: 100% :alt: Breeze release-management prepare-airflow-package @@ -79,7 +79,7 @@ tarball for. breeze release-management prepare-airflow-tarball --version 2.8.0rc1 .. image:: ./images/output_release-management_prepare-airflow-tarball.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-airflow-tarball.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-airflow-tarball.svg :width: 100% :alt: Breeze release-management prepare-airflow-tarball @@ -94,7 +94,7 @@ automates it. breeze release-management create-minor-branch .. image:: ./images/output_release-management_create-minor-branch.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_create-minor-branch.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_create-minor-branch.svg :width: 100% :alt: Breeze release-management create-minor-branch @@ -109,7 +109,7 @@ When we prepare release candidate, we automate some of the steps we need to do. breeze release-management start-rc-process .. image:: ./images/output_release-management_start-rc-process.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_start-rc-process.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_start-rc-process.svg :width: 100% :alt: Breeze release-management start-rc-process @@ -123,7 +123,7 @@ When we prepare final release, we automate some of the steps we need to do. breeze release-management start-release .. image:: ./images/output_release-management_start-release.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_start-rc-process.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_start-rc-process.svg :width: 100% :alt: Breeze release-management start-rc-process @@ -154,7 +154,7 @@ You can also generate python client with custom security schemes. These are all of the available flags for the command: .. image:: ./images/output_release-management_prepare-python-client.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-python-client.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-python-client.svg :width: 100% :alt: Breeze release management prepare Python client @@ -185,7 +185,7 @@ step can be skipped if you pass the ``--skip-latest`` flag. These are all of the available flags for the ``release-prod-images`` command: .. image:: ./images/output_release-management_release-prod-images.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_release-prod-images.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_release-prod-images.svg :width: 100% :alt: Breeze release management release prod images @@ -208,7 +208,7 @@ However, If you want to disable this behaviour, set the envvar CLEAN_LOCAL_TAGS These are all of the available flags for the ``tag-providers`` command: .. image:: ./images/output_release-management_tag-providers.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_tag-providers.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_tag-providers.svg :width: 100% :alt: Breeze release management tag-providers @@ -234,7 +234,7 @@ which version of Helm Chart you are preparing the tarball for. breeze release-management prepare-helm-chart-tarball --version 1.12.0 --version-suffix rc1 .. image:: ./images/output_release-management_prepare-helm-chart-tarball.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-helm-chart-tarball.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-helm-chart-tarball.svg :width: 100% :alt: Breeze release-management prepare-helm-chart-tarball @@ -256,7 +256,7 @@ This prepares helm chart .tar.gz package in the dist folder. breeze release-management prepare-helm-chart-package --sign myemail@apache.org .. image:: ./images/output_release-management_prepare-helm-chart-package.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-helm-chart-package.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-helm-chart-package.svg :width: 100% :alt: Breeze release-management prepare-helm-chart-package @@ -292,7 +292,7 @@ The below example perform documentation preparation for provider packages. You can also add ``--answer yes`` to perform non-interactive build. .. image:: ./images/output_release-management_prepare-provider-documentation.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-provider-documentation.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-provider-documentation.svg :width: 100% :alt: Breeze prepare-provider-documentation @@ -325,7 +325,7 @@ You can see all providers available by running this command: breeze release-management prepare-provider-packages --help .. image:: ./images/output_release-management_prepare-provider-packages.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_prepare-provider-packages.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-provider-packages.svg :width: 100% :alt: Breeze prepare-provider-packages @@ -349,7 +349,7 @@ You can also run the verification with an earlier airflow version to check for c All the command parameters are here: .. image:: ./images/output_release-management_install-provider-packages.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_install-provider-packages.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_install-provider-packages.svg :width: 100% :alt: Breeze install-provider-packages @@ -373,7 +373,7 @@ You can also run the verification with an earlier airflow version to check for c All the command parameters are here: .. image:: ./images/output_release-management_verify-provider-packages.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_verify-provider-packages.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_verify-provider-packages.svg :width: 100% :alt: Breeze verify-provider-packages @@ -387,7 +387,7 @@ provider has been released) and date of the release of the provider version. These are all of the available flags for the ``generate-providers-metadata`` command: .. image:: ./images/output_release-management_generate-providers-metadata.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_generate-providers-metadata.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_generate-providers-metadata.svg :width: 100% :alt: Breeze release management generate providers metadata @@ -398,7 +398,7 @@ Generating Provider Issue You can use Breeze to generate a provider issue when you release new providers. .. image:: ./images/output_release-management_generate-issue-content-providers.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_generate-issue-content-providers.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_generate-issue-content-providers.svg :width: 100% :alt: Breeze generate-issue-content-providers @@ -414,7 +414,7 @@ command. These are all available flags of ``clean-old-provider-artifacts`` command: .. image:: ./images/output_release-management_clean-old-provider-artifacts.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_clean-old-provider-artifacts.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_clean-old-provider-artifacts.svg :width: 100% :alt: Breeze Clean Old Provider Artifacts @@ -462,7 +462,7 @@ Constraints are generated separately for each python version and there are separ These are all available flags of ``generate-constraints`` command: .. image:: ./images/output_release-management_generate-constraints.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_generate-constraints.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_generate-constraints.svg :width: 100% :alt: Breeze generate-constraints @@ -485,7 +485,7 @@ tagged already in the past. This can be done using ``breeze release-management u These are all available flags of ``update-constraints`` command: .. image:: ./images/output_release-management_update-constraints.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_update-constraints.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_update-constraints.svg :width: 100% :alt: Breeze update-constraints @@ -552,7 +552,7 @@ publishing docs for multiple providers. These are all available flags of ``release-management publish-docs`` command: .. image:: ./images/output_release-management_publish-docs.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_publish-docs.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_publish-docs.svg :width: 100% :alt: Breeze Publish documentation @@ -596,7 +596,7 @@ providers - you can mix apache-airflow, helm-chart and provider packages this wa These are all available flags of ``release-management add-back-references`` command: .. image:: ./images/output_release-management_add-back-references.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_release-management_add-back-references.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_add-back-references.svg :width: 100% :alt: Breeze Add Back References @@ -606,7 +606,7 @@ SBOM generation tasks Maintainers also can use Breeze for SBOM generation: .. image:: ./images/output_sbom.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_sbom.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_sbom.svg :width: 100% :alt: Breeze sbom @@ -619,7 +619,7 @@ done by the ``generate-providers-requirements`` command. This command generates selected provider and python version, using the airflow version specified. .. image:: ./images/output_sbom_generate-providers-requirements.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_sbom_generate-providers-requirements.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_sbom_generate-providers-requirements.svg :width: 100% :alt: Breeze generate SBOM provider requirements @@ -634,7 +634,7 @@ information is written directly to ``docs-archive`` in airflow-site repository. These are all of the available flags for the ``update-sbom-information`` command: .. image:: ./images/output_sbom_update-sbom-information.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_sbomt_update-sbom-information.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_sbomt_update-sbom-information.svg :width: 100% :alt: Breeze update sbom information @@ -646,7 +646,7 @@ such images are built with the ``build-all-airflow-images`` command. This command will build one docker image per python version, with all the airflow versions >=2.0.0 compatible. .. image:: ./images/output_sbom_build-all-airflow-images.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_sbom_build-all-airflow-images.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_sbom_build-all-airflow-images.svg :width: 100% :alt: Breeze build all airflow images @@ -658,7 +658,7 @@ The SBOM information published on our website can be converted into a spreadshee properties of the dependencies. This is done by the ``export-dependency-information`` command. .. image:: ./images/output_sbom_export-dependency-information.svg - :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_sbom_export-dependency-information.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_sbom_export-dependency-information.svg :width: 100% :alt: Breeze sbom export dependency information @@ -666,3 +666,26 @@ properties of the dependencies. This is done by the ``export-dependency-informat Next step: Follow the `Advanced Breeze topics <10_advanced_breeze_topics.rst>`_ to learn more about Breeze internals. + +Preparing airflow Task SDK packages +""""""""""""""""""""""""""""""""""" + +You can prepare airflow packages using Breeze: + +.. code-block:: bash + + breeze release-management prepare-task-sdk-package + +This prepares airflow Task SDK .whl package in the dist folder. + +Again, you can specify optional ``--package-format`` flag to build selected formats of the Task SDK packages, +default is to build ``both`` type of packages ``sdist`` and ``wheel``. + +.. code-block:: bash + + breeze release-management prepare-task-sdk-package --package-format=wheel + +.. image:: ./images/output_release-management_prepare-task-sdk-package.svg + :target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/doc/images/output_release-management_prepare-airflow-package.svg + :width: 100% + :alt: Breeze release-management prepare-task-sdk-package diff --git a/dev/breeze/doc/10_advanced_breeze_topics.rst b/dev/breeze/doc/10_advanced_breeze_topics.rst index ac5421f85aa9a..a4f9384863009 100644 --- a/dev/breeze/doc/10_advanced_breeze_topics.rst +++ b/dev/breeze/doc/10_advanced_breeze_topics.rst @@ -33,8 +33,8 @@ For testing, you can create your own virtual environment, or use the one that `` already installed breeze following the recommended ``pipx install -e ./dev/breeze`` command. For local virtualenv, you can use ``pyenv`` or any other virtualenv wrapper. For example with ``pyenv``, -you can use ``pyenv virtualenv 3.8.6 airflow-breeze`` to create virtualenv called ``airflow-breeze`` -with Python 3.8.6. Then you can use ``pyenv activate airflow-breeze`` to activate it and install breeze +you can use ``pyenv virtualenv 3.9.6 airflow-breeze`` to create virtualenv called ``airflow-breeze`` +with Python 3.9.6. Then you can use ``pyenv activate airflow-breeze`` to activate it and install breeze in editable mode with ``pip install -e ./dev/breeze``. For ``pipx`` virtualenv, you can use the virtualenv that ``pipx`` created for you. You can find the name @@ -56,7 +56,7 @@ make sure to follow these steps: this will bypass the check we run in Breeze to see if there are new requirements to install for it See example configuration for PyCharm which has run/debug configuration for -``breeze sbom generate-providers-requirements --provider-id sqlite --python 3.8`` +``breeze sbom generate-providers-requirements --provider-id sqlite --python 3.9`` .. raw:: html diff --git a/dev/breeze/doc/adr/0002-implement-standalone-python-command.md b/dev/breeze/doc/adr/0002-implement-standalone-python-command.md index 37eebcf3e15d1..ddd005fd92dde 100644 --- a/dev/breeze/doc/adr/0002-implement-standalone-python-command.md +++ b/dev/breeze/doc/adr/0002-implement-standalone-python-command.md @@ -138,7 +138,7 @@ There are a few properties of Breeze/CI scripts that should be maintained though run a command and get everything done with the least number of prerequisites * The prerequisites for Breeze and CI are: - * Python 3.8+ (Python 3.8 end of life is October 2024) + * Python 3.9+ (Python 3.9 end of life is October 2025) * Docker (23.0+) * Docker Compose (2.16.0+) * No other tools and CLI commands should be needed diff --git a/dev/breeze/doc/ci/02_images.md b/dev/breeze/doc/ci/02_images.md index 6dfa8f350f828..8c699e43b5dbb 100644 --- a/dev/breeze/doc/ci/02_images.md +++ b/dev/breeze/doc/ci/02_images.md @@ -129,17 +129,17 @@ The images are built with default extras - different extras for CI and production image and you can change the extras via the `--airflow-extras` parameters and add new ones with `--additional-airflow-extras`. -For example if you want to build Python 3.8 version of production image +For example if you want to build Python 3.9 version of production image with "all" extras installed you should run this command: ``` bash -breeze prod-image build --python 3.8 --airflow-extras "all" +breeze prod-image build --python 3.9 --airflow-extras "all" ``` If you just want to add new extras you can add them like that: ``` bash -breeze prod-image build --python 3.8 --additional-airflow-extras "all" +breeze prod-image build --python 3.9 --additional-airflow-extras "all" ``` The command that builds the CI image is optimized to minimize the time @@ -160,7 +160,7 @@ You can also build production images from PIP packages via providing `--install-airflow-version` parameter to Breeze: ``` bash -breeze prod-image build --python 3.8 --additional-airflow-extras=trino --install-airflow-version=2.0.0 +breeze prod-image build --python 3.9 --additional-airflow-extras=trino --install-airflow-version=2.0.0 ``` This will build the image using command similar to: @@ -168,7 +168,7 @@ This will build the image using command similar to: ``` bash pip install \ apache-airflow[async,amazon,celery,cncf.kubernetes,docker,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv]==2.0.0 \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.0.0/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.0.0/constraints-3.9.txt" ``` > [!NOTE] @@ -199,7 +199,7 @@ HEAD of development for constraints): ``` bash pip install "https://github.com/apache/airflow/archive/.tar.gz#egg=apache-airflow" \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.8.txt" + --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-main/constraints-3.9.txt" ``` You can also skip installing airflow and install it from locally @@ -207,7 +207,7 @@ provided files by using `--install-packages-from-context` parameter to Breeze: ``` bash -breeze prod-image build --python 3.8 --additional-airflow-extras=trino --install-packages-from-context +breeze prod-image build --python 3.9 --additional-airflow-extras=trino --install-packages-from-context ``` In this case you airflow and all packages (.whl files) should be placed @@ -241,20 +241,20 @@ flags: `registry` (default), `local`, or `disabled` flags when you run Breeze commands. For example: ``` bash -breeze ci-image build --python 3.8 --docker-cache local +breeze ci-image build --python 3.9 --docker-cache local ``` Will build the CI image using local build cache (note that it will take quite a long time the first time you run it). ``` bash -breeze prod-image build --python 3.8 --docker-cache registry +breeze prod-image build --python 3.9 --docker-cache registry ``` Will build the production image with cache used from registry. ``` bash -breeze prod-image build --python 3.8 --docker-cache disabled +breeze prod-image build --python 3.9 --docker-cache disabled ``` Will build the production image from the scratch. @@ -336,12 +336,12 @@ faster. It is enough to pass `--image-tag` and the registry and Breeze will download and execute commands using the same image that was used during the CI tests. -For example this command will run the same Python 3.8 image as was used +For example this command will run the same Python 3.9 image as was used in build identified with 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e commit SHA with enabled rabbitmq integration. ``` bash -breeze --image-tag 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e --python 3.8 --integration rabbitmq +breeze --image-tag 9a621eaa394c0a0a336f8e1b31b35eff4e4ee86e --python 3.9 --integration rabbitmq ``` You can see more details and examples in[Breeze](../README.rst) @@ -361,7 +361,7 @@ you can build the image in the Here just a few examples are presented which should give you general understanding of what you can customize. -This builds the production image in version 3.8 with additional airflow +This builds the production image in version 3.9 with additional airflow extras from 2.0.0 PyPI package and additional apt dev and runtime dependencies. @@ -373,7 +373,7 @@ plugin installed. ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg ADDITIONAL_AIRFLOW_EXTRAS="jdbc" \ --build-arg ADDITIONAL_PYTHON_DEPS="pandas" \ --build-arg ADDITIONAL_DEV_APT_DEPS="gcc g++" \ @@ -384,7 +384,7 @@ the same image can be built using `breeze` (it supports auto-completion of the options): ``` bash -breeze ci-image build --python 3.8 --additional-airflow-extras=jdbc --additional-python-deps="pandas" \ +breeze ci-image build --python 3.9 --additional-airflow-extras=jdbc --additional-python-deps="pandas" \ --additional-dev-apt-deps="gcc g++" ``` @@ -398,7 +398,7 @@ comment](https://github.com/apache/airflow/issues/8605#issuecomment-690065621): ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg AIRFLOW_INSTALLATION_METHOD="apache-airflow" \ --build-arg ADDITIONAL_AIRFLOW_EXTRAS="slack" \ --build-arg ADDITIONAL_PYTHON_DEPS="apache-airflow-providers-odbc \ @@ -423,8 +423,8 @@ can be used for CI images: | Build argument | Default value | Description | |-----------------------------------|-------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `PYTHON_BASE_IMAGE` | `python:3.8-slim-bookworm` | Base Python image | -| `PYTHON_MAJOR_MINOR_VERSION` | `3.8` | major/minor version of Python (should match base image) | +| `PYTHON_BASE_IMAGE` | `python:3.9-slim-bookworm` | Base Python image | +| `PYTHON_MAJOR_MINOR_VERSION` | `3.9` | major/minor version of Python (should match base image) | | `DEPENDENCIES_EPOCH_NUMBER` | `2` | increasing this number will reinstall all apt dependencies | | `ADDITIONAL_PIP_INSTALL_FLAGS` | | additional `pip` flags passed to the installation commands (except when reinstalling `pip` itself) | | `PIP_NO_CACHE_DIR` | `true` | if true, then no pip cache will be stored | @@ -455,59 +455,59 @@ can be used for CI images: Here are some examples of how CI images can built manually. CI is always built from local sources. -This builds the CI image in version 3.8 with default extras ("all"). +This builds the CI image in version 3.9 with default extras ("all"). ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" --tag my-image:0.0.1 + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" --tag my-image:0.0.1 ``` -This builds the CI image in version 3.8 with "gcp" extra only. +This builds the CI image in version 3.9 with "gcp" extra only. ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg AIRFLOW_EXTRAS=gcp --tag my-image:0.0.1 ``` -This builds the CI image in version 3.8 with "apache-beam" extra added. +This builds the CI image in version 3.9 with "apache-beam" extra added. ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg ADDITIONAL_AIRFLOW_EXTRAS="apache-beam" --tag my-image:0.0.1 ``` -This builds the CI image in version 3.8 with "mssql" additional package +This builds the CI image in version 3.9 with "mssql" additional package added. ``` bash DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg ADDITIONAL_PYTHON_DEPS="mssql" --tag my-image:0.0.1 ``` -This builds the CI image in version 3.8 with "gcc" and "g++" additional +This builds the CI image in version 3.9 with "gcc" and "g++" additional apt dev dependencies added. ``` DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg ADDITIONAL_DEV_APT_DEPS="gcc g++" --tag my-image:0.0.1 ``` -This builds the CI image in version 3.8 with "jdbc" extra and +This builds the CI image in version 3.9 with "jdbc" extra and "default-jre-headless" additional apt runtime dependencies added. ``` DOCKER_BUILDKIT=1 docker build . -f Dockerfile.ci \ --pull \ - --build-arg PYTHON_BASE_IMAGE="python:3.8-slim-bookworm" \ + --build-arg PYTHON_BASE_IMAGE="python:3.9-slim-bookworm" \ --build-arg AIRFLOW_EXTRAS=jdbc \ --tag my-image:0.0.1 ``` @@ -573,8 +573,7 @@ percent-encoded when you access them via UI (/ = %2F) | PROD image | airflow/\/prod/python\:\ | faster to build or pull. Production image optimized for size. | - \ might be either "main" or "v2-\*-test" -- \ - Python version (Major + Minor).Should be one of \["3.8", - "3.9", "3.10", "3.11", "3.12" \]. +- \ - Python version (Major + Minor).Should be one of \["3.9", "3.10", "3.11", "3.12" \]. - \ - full-length SHA of commit either from the tip of the branch (for pushes/schedule) or commit from the tip of the branch used for the PR. diff --git a/dev/breeze/doc/ci/04_selective_checks.md b/dev/breeze/doc/ci/04_selective_checks.md index 23131ec893948..e5894b0296875 100644 --- a/dev/breeze/doc/ci/04_selective_checks.md +++ b/dev/breeze/doc/ci/04_selective_checks.md @@ -169,8 +169,8 @@ Github Actions to pass the list of parameters to a command to execute | Output | Meaning of the output | Example value | List as string | |----------------------------------------|------------------------------------------------------------------------------------------------------|-------------------------------------------|----------------| | affected-providers-list-as-string | List of providers affected when they are selectively affected. | airbyte http | * | -| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | -| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | +| all-python-versions | List of all python versions there are available in the form of JSON array | ['3.9', '3.10'] | | +| all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.9 3.10 | * | | all-versions | If set to true, then all python, k8s, DB versions are used for tests. | false | | | basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | | build_system_changed_in_pyproject_toml | When builds system dependencies changed in pyproject.toml changed in the PR. | false | | @@ -184,7 +184,7 @@ Github Actions to pass the list of parameters to a command to execute | default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | | default-mysql-version | Which MySQL version to use as default | 5.7 | | | default-postgres-version | Which Postgres version to use as default | 10 | | -| default-python-version | Which Python version to use as default | 3.8 | | +| default-python-version | Which Python version to use as default | 3.9 | | | docker-cache | Which cache should be used for images ("registry", "local" , "disabled") | registry | | | docs-build | Whether to build documentation ("true"/"false") | true | | | docs-list-as-string | What filter to apply to docs building - based on which documentation packages should be built | apache-airflow helm-chart google | | @@ -200,7 +200,7 @@ Github Actions to pass the list of parameters to a command to execute | is-self-hosted-runner | Whether the runner is self-hosted | false | | | is-vm-runner | Whether the runner uses VM to run | true | | | kind-version | Which Kind version to use for tests | v0.16.0 | | -| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.8-v1.25.2 3.9-v1.26.4 | * | +| kubernetes-combos-list-as-string | All combinations of Python version and Kubernetes version to use for tests as space-separated string | 3.9-v1.25.2 3.9-v1.26.4 | * | | kubernetes-versions | All Kubernetes versions to use for tests as JSON array | ['v1.25.2'] | | | kubernetes-versions-list-as-string | All Kubernetes versions to use for tests as space-separated string | v1.25.2 | * | | mypy-folders | List of folders to be considered for mypy | [] | | @@ -219,8 +219,8 @@ Github Actions to pass the list of parameters to a command to execute | prod-image-build | Whether PROD image build is needed | true | | | providers-compatibility-checks | List of dicts: (python_version, airflow_version, removed_providers) for compatibility checks | [] | | | pyproject-toml-changed | When pyproject.toml changed in the PR. | false | | -| python-versions | List of python versions to use for that build | ['3.8'] | * | -| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.8 | * | +| python-versions | List of python versions to use for that build | ['3.9'] | * | +| python-versions-list-as-string | Which versions of MySQL to use for tests as space-separated string | 3.9 | * | | run-amazon-tests | Whether Amazon tests should be run ("true"/"false") | true | | | run-kubernetes-tests | Whether Kubernetes tests should be run ("true"/"false") | true | | | run-tests | Whether unit tests should be run ("true"/"false") | true | | diff --git a/dev/breeze/doc/ci/07_debugging.md b/dev/breeze/doc/ci/07_debugging.md index 6e6d46584edfa..9e7173ae84721 100644 --- a/dev/breeze/doc/ci/07_debugging.md +++ b/dev/breeze/doc/ci/07_debugging.md @@ -21,11 +21,11 @@ **Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* -- [Debugging CI Jobs in Github Actions](#debugging-ci-jobs-in-github-actions) +- [Debugging CI Jobs in Github Actions and changing their behaviour](#debugging-ci-jobs-in-github-actions-and-changing-their-behaviour) -# Debugging CI Jobs in Github Actions +# Debugging CI Jobs in Github Actions and changing their behaviour The CI jobs are notoriously difficult to test, because you can only really see results of it when you run them in CI environment, and the @@ -39,49 +39,28 @@ difficulty is that `Build Images` workflow is `pull-request-target` type, which means that it will always run using the `main` version - no matter what is in your Pull Request. -There are several ways how you can debug the CI jobs when you are -maintainer. +There are several ways how you can debug the CI jobs and modify their +behaviour when you are maintainer. + +When you create the PR you can set one of the labels below, also +in some cases, you need to run the PR as coming from the "apache" +repository rather than from your fork. + +You can also apply the label later and rebase the PR or close/reopen +the PR to apply the label to the PR. + +| Action to perform | Label to set | PR from "apache" repo | +|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------|:---------------------:| +| Run the build with all combinations of all
python, backends, kubernetes etc on PR,
and run all types of tests for all test
groups. | full tests needed | | +| Force to use public runners for the build | use public runners | | +| Debug resources used during the build for
parallel jobs | debug ci resources | | +| Force running PR on latest versions of
python, backends, kubernetes etc. when you
want to save resources and test only latest
versions | latest versions only | | +| Force running PR on minimal (default)
versions of python, backends, kubernetes etc.
in order to save resources and run tests only
for minimum versions | default versions only | | +| Make sure to clean dependency cache
usually when removing dependencies
You also need to increase
`DEPENDENCIES_EPOCH_NUMBER` in `Dockerfile.ci` | disable image cache | | +| Change build images workflows, breeze code or
scripts that are used during image build
so that the scripts can be modified by PR
| | Yes | +| Treat your build as "canary" build - including
updating constraints and pushing "main"
documentation. | | Yes | +| Remove any behaviour specific for the committers
such as using different runners by default. | non committer build | | -- When you want to tests the build with all combinations of all python, - backends etc on regular PR, add `full tests needed` label to the PR. -- When you want to test maintainer PR using public runners, add - `public runners` label to the PR -- When you want to see resources used by the run, add - `debug ci resources` label to the PR -- When you want to test changes to breeze that include changes to how - images are build you should push your PR to `apache` repository not to - your fork. This will run the images as part of the `CI` workflow - rather than using `Build images` workflow and use the same breeze - version for building image and testing -- When you want to test changes to workflows and CI scripts you can set - `all versions` label to the PR or `latest versions only`. - This will make the PR run using "all" versions of - Python, Kubernetes and the DBS. By default - unless you also change - dependencies in `pyproject.toml` or `generated/provider_dependencies.json` - such PRs will only use "default" versions of Python, Kubernetes and - DBs. This is useful when you want to test changes to the CI scripts - are not affected by the versions of Python, Kubernetes and DBs. -- Even if you change dependencies in `pyproject.toml`, or - `generated/provider_dependencies.json`, when you want to test changes to workflows - and CI scripts you can set `default versions only` label to the - This will make the PR run using the default (or latest) versions of - Python and Kubernetes and DBs. This is useful when you want to test - changes to the CI scripts and workflows and you want to use far - less resources than the full tests. -- When you want to test changes to `build-images.yml` workflow you - should push your branch as `main` branch in your local fork. This will - run changed `build-images.yml` workflow as it will be in `main` branch - of your fork -- When you are a committer and you change build images workflow, together - with build scripts, your build might fail because your scripts are used - in `build-images.yml` workflow, but the workflow is run using the `main` - version. Setting `non committer build` label will make your PR run using - the main version of the scripts and the workflow -- When you are a committer want to test how changes in your workflow affect - `canary` run, as maintainer, you should push your PR to `apache` repository - not to your fork and set `canary` label to the PR -- When you are a committer and want to test if the tests are passing if the - image is freshly built without cache, you can set `disable image cache` label. ----- diff --git a/dev/breeze/doc/ci/08_running_ci_locally.md b/dev/breeze/doc/ci/08_running_ci_locally.md index 6e1cbb0917536..cc9c89954df3e 100644 --- a/dev/breeze/doc/ci/08_running_ci_locally.md +++ b/dev/breeze/doc/ci/08_running_ci_locally.md @@ -72,19 +72,19 @@ For example knowing that the CI job was for commit `cd27124534b46c9688a1d89e75fcd137ab5137e3`: ``` bash -docker pull ghcr.io/apache/airflow/main/ci/python3.8:cd27124534b46c9688a1d89e75fcd137ab5137e3 +docker pull ghcr.io/apache/airflow/main/ci/python3.9:cd27124534b46c9688a1d89e75fcd137ab5137e3 -docker run -it ghcr.io/apache/airflow/main/ci/python3.8:cd27124534b46c9688a1d89e75fcd137ab5137e3 +docker run -it ghcr.io/apache/airflow/main/ci/python3.9:cd27124534b46c9688a1d89e75fcd137ab5137e3 ``` But you usually need to pass more variables and complex setup if you want to connect to a database or enable some integrations. Therefore it is easiest to use [Breeze](../README.rst) for that. For -example if you need to reproduce a MySQL environment in python 3.8 +example if you need to reproduce a MySQL environment in python 3.9 environment you can run: ``` bash -breeze --image-tag cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.8 --backend mysql +breeze --image-tag cd27124534b46c9688a1d89e75fcd137ab5137e3 --python 3.9 --backend mysql ``` You will be dropped into a shell with the exact version that was used diff --git a/dev/breeze/doc/images/output-commands.svg b/dev/breeze/doc/images/output-commands.svg index 78c753526e449..f80a72a9fbc54 100644 --- a/dev/breeze/doc/images/output-commands.svg +++ b/dev/breeze/doc/images/output-commands.svg @@ -302,8 +302,8 @@ ╭─ Execution mode ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ --python-pPython major/minor version used in Airflow image for images. -(>3.8< | 3.9 | 3.10 | 3.11 | 3.12)                           -[default: 3.8]                                               +(>3.9< | 3.10 | 3.11 | 3.12)                                 +[default: 3.9]                                               --integrationIntegration(s) to enable when running (can be more than one).                        (all | all-testable | cassandra | celery | drill | kafka | kerberos | mongo | mssql  | openlineage | otel | pinot | qdrant | redis | statsd | trino | ydb)                diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index 0ddded9468a55..d52aa78d7ec1f 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -203,32 +203,32 @@ Build documents. ╭─ Doc flags ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---docs-only-dOnly build documentation. ---spellcheck-only-sOnly run spell checking. ---clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx     +--docs-only-dOnly build documentation. +--spellcheck-only-sOnly run spell checking. +--clean-buildClean inventories of Inter-Sphinx documentation and generated APIs and sphinx     artifacts before the build - useful for a clean build.                            ---one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx        +--one-pass-onlyBuilds documentation in one pass only. This is useful for debugging sphinx        errors.                                                                           ---package-filterFilter(s) to use more than one can be specified. You can use glob pattern         +--package-filterFilter(s) to use more than one can be specified. You can use glob pattern         matching the full package name, for example `apache-airflow-providers-*`. Useful  when you want to selectseveral similarly named packages together.                 (TEXT)                                                                            ---include-not-ready-providersWhether to include providers that are not yet ready to be released. ---include-removed-providersWhether to include providers that are removed. ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---builderBuildx builder used to perform `docker buildx build` commands.(TEXT) +--include-not-ready-providersWhether to include providers that are not yet ready to be released. +--include-removed-providersWhether to include providers that are removed. +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +--builderBuildx builder used to perform `docker buildx build` commands.(TEXT) [default: autodetect]                                          ---package-listOptional, contains comma-separated list of package ids that are processed for     +--package-listOptional, contains comma-separated list of package ids that are processed for     documentation building, and document publishing. It is an easier alternative to   adding individual packages as arguments to every command. This overrides the      packages passed as arguments.                                                     (TEXT)                                                                            ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---verbose-vPrint verbose information about performed steps. ---answer-aForce answer to questions.(y | n | q | yes | no | quit) ---help-hShow this message and exit. +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--answer-aForce answer to questions.(y | n | q | yes | no | quit) +--help-hShow this message and exit. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_ci-image_build.svg b/dev/breeze/doc/images/output_ci-image_build.svg index 131b618e403ce..62339f7053924 100644 --- a/dev/breeze/doc/images/output_ci-image_build.svg +++ b/dev/breeze/doc/images/output_ci-image_build.svg @@ -1,4 +1,4 @@ - +