From f3a6cdbed3aabe3a355fcbd7c043d9e16aa5062a Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 27 Nov 2023 03:37:28 +0100 Subject: [PATCH] Add common code and utils to implement ER pre-commit in a clearer way (#35875) * Add common code and utils to implement ER pre-commit in a clearer way Extracts comon initialization code for pre-commits that allows to use a simpler way of running pre-commits with breeze ci image. Rather than running a custom `docker` command, we are using the `breeze shell` command to run the command inside the image. We are also fixing a problem where the ER diagram was generated using random backend - we force it now to use Postgres as backend. There are few small problems solved as part of it as well: * We are also switching the console used to be the standard colored one so that the output will use colors also in git hook and CI. * When image needs update we are going to warn about it in the builds that are "serial" (i.e. do not run in parallell) that the image should be upgraded. We are not doing it for checks that use parallelism because it would pollute the output too much (and those are usually not run with images anyway). * There was an error where pre-commit run with breeze would also start port-forwarding if database was started together with it, so pre-commit project no longer adds port forwarding for backends. * If there was a database stated as part of pre-commit project, it would continue running after pre-commit completed, which is undesireable. The pre-commit will now stop pre-commit project after completion. * Update dev/breeze/src/airflow_breeze/utils/md5_build_check.py Co-authored-by: Pankaj Koti --------- Co-authored-by: Pankaj Koti --- .github/workflows/ci.yml | 2 +- .pre-commit-config.yaml | 3 +- STATIC_CODE_CHECKS.rst | 4 +- .../commands/ci_image_commands.py | 5 +- .../commands/developer_commands.py | 6 +- .../commands/developer_commands_config.py | 1 + .../airflow_breeze/params/build_ci_params.py | 1 + .../src/airflow_breeze/params/shell_params.py | 5 + .../airflow_breeze/utils/common_options.py | 6 + .../airflow_breeze/utils/md5_build_check.py | 27 +- docs/apache-airflow/img/airflow_erd.svg | 2870 ++++++++--------- images/breeze/output_shell.svg | 162 +- images/breeze/output_shell.txt | 2 +- .../ci/pre_commit/common_precommit_utils.py | 91 + .../pre_commit_update_er_diagram.py | 60 +- .../in_container/run_prepare_er_diagram.py | 7 +- 16 files changed, 1682 insertions(+), 1570 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dd5ec1828e681c..53e4da69166270 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -611,7 +611,7 @@ jobs: --skip-image-upgrade-check --commit-ref "${{ github.sha }}" env: VERBOSE: "false" - SKIP_IMAGE_PRE_COMMITS: "true" + SKIP_BREEZE_PRE_COMMITS: "true" SKIP: ${{ needs.build-info.outputs.skip-pre-commits }} COLUMNS: "250" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf8f53483ae5ef..85a7cf96c72094 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -429,6 +429,7 @@ repos: language: python files: ^docs pass_filenames: false + additional_dependencies: ['rich>=12.4.4'] - id: check-pydevd-left-in-code language: pygrep name: Check for pydevd debug statements accidentally left @@ -1070,5 +1071,5 @@ repos: entry: ./scripts/ci/pre_commit/pre_commit_update_er_diagram.py pass_filenames: false files: ^airflow/migrations/versions/.*\.py$|^docs/apache-airflow/migrations-ref\.rst$ - additional_dependencies: ['rich>=12.4.4', 'inputimeout', 'pyyaml', 'jsonschema', 'filelock', 'markdown-it-py'] + additional_dependencies: ['rich>=12.4.4'] ## ONLY ADD PRE-COMMITS HERE THAT REQUIRE CI IMAGE diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 56533885de7173..b83cb64859322a 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -123,8 +123,8 @@ require Breeze Docker image to be built locally. ``export SKIP=ruff,mypy-core,``. You can also add this to your ``.bashrc`` or ``.zshrc`` if you do not want to set it manually every time you enter the terminal. - In case you do not have breeze image configured locally, you can also disable all checks that require - the image by setting ``SKIP_IMAGE_PRE_COMMITS`` to "true". This will mark the tests as "green" automatically + In case you do not have breeze image configured locally, you can also disable all checks that require breeze + the image by setting ``SKIP_BREEZE_PRE_COMMITS`` to "true". This will mark the tests as "green" automatically when run locally (note that those checks will anyway run in CI). .. note:: Mypy volume cache diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index 9f8b345efcc698..52134e6b0e9afb 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -132,8 +132,6 @@ def check_if_image_building_is_needed(ci_image_params: BuildCiParams, output: Ou ) if result.returncode != 0: return True - if ci_image_params.skip_image_upgrade_check: - return False if not ci_image_params.force_build and not ci_image_params.upgrade_to_newer_dependencies: if not should_we_run_the_build(build_ci_params=ci_image_params): return False @@ -638,6 +636,7 @@ def should_we_run_the_build(build_ci_params: BuildCiParams) -> bool: from inputimeout import TimeoutOccurred if not md5sum_check_if_build_is_needed( + build_ci_params=build_ci_params, md5sum_cache_dir=build_ci_params.md5sum_cache_dir, skip_provider_dependencies_check=build_ci_params.skip_provider_dependencies_check, ): @@ -808,6 +807,8 @@ def rebuild_or_pull_ci_image_if_needed(command_params: ShellParams | BuildCiPara platform=command_params.platform, force_build=command_params.force_build, skip_provider_dependencies_check=command_params.skip_provider_dependencies_check, + skip_image_upgrade_check=command_params.skip_image_upgrade_check, + warn_image_upgrade_needed=command_params.warn_image_upgrade_needed, ) if command_params.image_tag is not None and command_params.image_tag != "latest": return_code, message = run_pull_image( diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands.py b/dev/breeze/src/airflow_breeze/commands/developer_commands.py index 4699d5522b96ce..2f22b5cbef0549 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands.py @@ -85,6 +85,7 @@ option_use_airflow_version, option_use_packages_from_dist, option_verbose, + option_warn_image_upgrade_needed, ) from airflow_breeze.utils.console import get_console from airflow_breeze.utils.custom_param_types import BetterChoice @@ -203,6 +204,7 @@ def run(self): @option_skip_db_tests @option_skip_environment_initialization @option_skip_image_upgrade_check +@option_warn_image_upgrade_needed @option_standalone_dag_processor @option_upgrade_boto @option_use_airflow_version @@ -248,6 +250,7 @@ def shell( use_airflow_version: str | None, use_packages_from_dist: bool, verbose_commands: bool, + warn_image_upgrade_needed: bool, ): """Enter breeze environment. this is the default command use when no other is selected.""" if get_verbose() or get_dry_run() and not quiet: @@ -298,6 +301,7 @@ def shell( use_packages_from_dist=use_packages_from_dist, verbose_commands=verbose_commands, restart=restart, + warn_image_upgrade_needed=warn_image_upgrade_needed, ) fix_ownership_using_docker() sys.exit(result.returncode) @@ -693,7 +697,7 @@ def static_checks( text=True, env=env, ) - if not os.environ.get("SKIP_IMAGE_PRE_COMMITS"): + if not os.environ.get("SKIP_BREEZE_PRE_COMMITS"): fix_ownership_using_docker() if static_checks_result.returncode != 0: if os.environ.get("CI"): diff --git a/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py b/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py index 245436ce6f4d38..6d0456b1bd56f9 100644 --- a/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/developer_commands_config.py @@ -94,6 +94,7 @@ "options": [ "--quiet", "--skip-image-upgrade-check", + "--warn-image-upgrade-needed", "--skip-environment-initialization", "--tty", ], diff --git a/dev/breeze/src/airflow_breeze/params/build_ci_params.py b/dev/breeze/src/airflow_breeze/params/build_ci_params.py index 35b04f167a0f62..37678d130e82bb 100644 --- a/dev/breeze/src/airflow_breeze/params/build_ci_params.py +++ b/dev/breeze/src/airflow_breeze/params/build_ci_params.py @@ -42,6 +42,7 @@ class BuildCiParams(CommonBuildParams): eager_upgrade_additional_requirements: str | None = None skip_provider_dependencies_check: bool = False skip_image_upgrade_check: bool = False + warn_image_upgrade_needed: bool = False @property def airflow_version(self): diff --git a/dev/breeze/src/airflow_breeze/params/shell_params.py b/dev/breeze/src/airflow_breeze/params/shell_params.py index ff73cfbd6d0b9a..a314075d29ddbf 100644 --- a/dev/breeze/src/airflow_breeze/params/shell_params.py +++ b/dev/breeze/src/airflow_breeze/params/shell_params.py @@ -183,6 +183,7 @@ class ShellParams: verbose: bool = False verbose_commands: bool = False version_suffix_for_pypi: str = "" + warn_image_upgrade_needed: bool = False def clone_with_test(self, test_type: str) -> ShellParams: new_params = deepcopy(self) @@ -275,6 +276,10 @@ def get_backend_compose_files(self, backend: str) -> list[Path]: backend_docker_compose_file = DOCKER_COMPOSE_DIR / f"backend-{backend}.yml" if backend in ("sqlite", "none") or not self.forward_ports: return [backend_docker_compose_file] + if self.project_name == "pre-commit": + # do not forward ports for pre-commit runs - to not clash with running containers from + # breeze + return [backend_docker_compose_file] return [backend_docker_compose_file, DOCKER_COMPOSE_DIR / f"backend-{backend}-port.yml"] @cached_property diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index a9bb98b60882a8..cf59df8bc09a37 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -758,6 +758,12 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option, is_flag=True, envvar="SKIP_IMAGE_UPGRADE_CHECK", ) +option_warn_image_upgrade_needed = click.option( + "--warn-image-upgrade-needed", + help="Warn when image upgrade is needed even if --skip-upgrade-check is used.", + is_flag=True, + envvar="WARN_IMAGE_UPGRADE_NEEDED", +) option_skip_environment_initialization = click.option( "--skip-environment-initialization", help="Skip running breeze entrypoint initialization - no user output, no db checks.", diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index d1ec943785d42c..9a43f6ea38419d 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -23,12 +23,16 @@ import os import sys from pathlib import Path +from typing import TYPE_CHECKING from airflow_breeze.global_constants import ALL_PROVIDER_YAML_FILES, FILES_FOR_REBUILD_CHECK from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT from airflow_breeze.utils.run_utils import run_command +if TYPE_CHECKING: + from airflow_breeze.params.build_ci_params import BuildCiParams + def check_md5checksum_in_cache_modified(file_hash: str, cache_path: Path, update: bool) -> bool: """ @@ -126,20 +130,33 @@ def calculate_md5_checksum_for_files( return modified_files, not_modified_files -def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path, skip_provider_dependencies_check: bool) -> bool: +def md5sum_check_if_build_is_needed( + build_ci_params: BuildCiParams, md5sum_cache_dir: Path, skip_provider_dependencies_check: bool +) -> bool: """ Checks if build is needed based on whether important files were modified. + :param build_ci_params: parameters for the build :param md5sum_cache_dir: directory where cached md5 sums are stored :param skip_provider_dependencies_check: whether to skip regeneration of the provider dependencies :return: True if build is needed. """ - build_needed = False modified_files, not_modified_files = calculate_md5_checksum_for_files( md5sum_cache_dir, update=False, skip_provider_dependencies_check=skip_provider_dependencies_check ) if modified_files: + if build_ci_params.skip_image_upgrade_check: + if build_ci_params.warn_image_upgrade_needed: + get_console().print( + "\n[warning]You are skipping the image upgrade check, but the image needs an upgrade. " + "This might lead to out-dated results of the check![/]" + ) + get_console().print( + f"[info]Consider running `breeze ci-image build --python {build_ci_params.python} " + f"at earliest convenience![/]\n" + ) + return False get_console().print( f"[warning]The following important files are modified in {AIRFLOW_SOURCES_ROOT} " f"since last time image was built: [/]\n\n" @@ -147,13 +164,15 @@ def md5sum_check_if_build_is_needed(md5sum_cache_dir: Path, skip_provider_depend for file in modified_files: get_console().print(f" * [info]{file}[/]") get_console().print("\n[warning]Likely CI image needs rebuild[/]\n") - build_needed = True + return True else: + if build_ci_params.skip_image_upgrade_check: + return False get_console().print( "[info]Docker image build is not needed for CI build as no important files are changed! " "You can add --force-build to force it[/]" ) - return build_needed + return False def save_md5_file(cache_path: Path, file_content: str) -> None: diff --git a/docs/apache-airflow/img/airflow_erd.svg b/docs/apache-airflow/img/airflow_erd.svg index a0cfc1866cbec9..91b4231b3e4060 100644 --- a/docs/apache-airflow/img/airflow_erd.svg +++ b/docs/apache-airflow/img/airflow_erd.svg @@ -4,1632 +4,1632 @@ - - + + %3 - - + + -ab_permission - -ab_permission - -id - [INTEGER] - NOT NULL - -name - [VARCHAR(100)] - NOT NULL +job + +job + +id + [INTEGER] + NOT NULL + +dag_id + [VARCHAR(250)] + +end_date + [TIMESTAMP] + +executor_class + [VARCHAR(500)] + +hostname + [VARCHAR(500)] + +job_type + [VARCHAR(30)] + +latest_heartbeat + [TIMESTAMP] + +start_date + [TIMESTAMP] + +state + [VARCHAR(20)] + +unixname + [VARCHAR(1000)] - + -ab_permission_view - -ab_permission_view - -id - [INTEGER] - NOT NULL - -permission_id - [INTEGER] - -view_menu_id - [INTEGER] - - - -ab_permission--ab_permission_view - -0..N -{0,1} +slot_pool + +slot_pool + +id + [INTEGER] + NOT NULL + +description + [TEXT] + +include_deferred + [BOOLEAN] + NOT NULL + +pool + [VARCHAR(256)] + +slots + [INTEGER] - + -ab_permission_view_role - -ab_permission_view_role - -id - [INTEGER] - NOT NULL - -permission_view_id - [INTEGER] - -role_id - [INTEGER] - - - -ab_permission_view--ab_permission_view_role - -0..N -{0,1} +log + +log + +id + [INTEGER] + NOT NULL + +dag_id + [VARCHAR(250)] + +dttm + [TIMESTAMP] + +event + [VARCHAR(30)] + +execution_date + [TIMESTAMP] + +extra + [TEXT] + +map_index + [INTEGER] + +owner + [VARCHAR(500)] + +owner_display_name + [VARCHAR(500)] + +task_id + [VARCHAR(250)] - + -ab_view_menu - -ab_view_menu - -id - [INTEGER] - NOT NULL - -name - [VARCHAR(250)] - NOT NULL - - - -ab_view_menu--ab_permission_view - -0..N -{0,1} +dag_code + +dag_code + +fileloc_hash + [BIGINT] + NOT NULL + +fileloc + [VARCHAR(2000)] + NOT NULL + +last_updated + [TIMESTAMP] + NOT NULL + +source_code + [TEXT] + NOT NULL - + -ab_role - -ab_role - -id - [INTEGER] - NOT NULL - -name - [VARCHAR(64)] - NOT NULL - - - -ab_role--ab_permission_view_role - -0..N -{0,1} - - - -ab_user_role - -ab_user_role - -id - [INTEGER] - NOT NULL - -role_id - [INTEGER] - -user_id - [INTEGER] - - - -ab_role--ab_user_role - -0..N -{0,1} - - - -ab_register_user - -ab_register_user - -id - [INTEGER] - NOT NULL - -email - [VARCHAR(512)] - NOT NULL - -first_name - [VARCHAR(256)] - NOT NULL - -last_name - [VARCHAR(256)] - NOT NULL - -password - [VARCHAR(256)] - -registration_date - [DATETIME] - -registration_hash - [VARCHAR(256)] - -username - [VARCHAR(512)] - NOT NULL +dag_pickle + +dag_pickle + +id + [INTEGER] + NOT NULL + +created_dttm + [TIMESTAMP] + +pickle + [BYTEA] + +pickle_hash + [BIGINT] - + ab_user - -ab_user - -id - [INTEGER] - NOT NULL - -active - [BOOLEAN] - -changed_by_fk - [INTEGER] - -changed_on - [DATETIME] - -created_by_fk - [INTEGER] - -created_on - [DATETIME] - -email - [VARCHAR(512)] - NOT NULL - -fail_login_count - [INTEGER] - -first_name - [VARCHAR(256)] - NOT NULL - -last_login - [DATETIME] - -last_name - [VARCHAR(256)] - NOT NULL - -login_count - [INTEGER] - -password - [VARCHAR(256)] - -username - [VARCHAR(512)] - NOT NULL - - - -ab_user--ab_user_role - -0..N -{0,1} + +ab_user + +id + [INTEGER] + NOT NULL + +active + [BOOLEAN] + +changed_by_fk + [INTEGER] + +changed_on + [TIMESTAMP] + +created_by_fk + [INTEGER] + +created_on + [TIMESTAMP] + +email + [VARCHAR(512)] + NOT NULL + +fail_login_count + [INTEGER] + +first_name + [VARCHAR(256)] + NOT NULL + +last_login + [TIMESTAMP] + +last_name + [VARCHAR(256)] + NOT NULL + +login_count + [INTEGER] + +password + [VARCHAR(256)] + +username + [VARCHAR(512)] + NOT NULL - + ab_user--ab_user - -0..N -{0,1} + +0..N +{0,1} - + ab_user--ab_user - -0..N -{0,1} + +0..N +{0,1} + + + +ab_user_role + +ab_user_role + +id + [INTEGER] + NOT NULL + +role_id + [INTEGER] + +user_id + [INTEGER] + + + +ab_user--ab_user_role + +0..N +{0,1} - + dag_run_note - -dag_run_note - -dag_run_id - [INTEGER] - NOT NULL - -content - [VARCHAR(1000)] - -created_at - [TIMESTAMP] - NOT NULL - -updated_at - [TIMESTAMP] - NOT NULL - -user_id - [INTEGER] + +dag_run_note + +dag_run_id + [INTEGER] + NOT NULL + +content + [VARCHAR(1000)] + +created_at + [TIMESTAMP] + NOT NULL + +updated_at + [TIMESTAMP] + NOT NULL + +user_id + [INTEGER] - + ab_user--dag_run_note - -0..N -{0,1} + +0..N +{0,1} - + task_instance_note - -task_instance_note - -dag_id - [VARCHAR(250)] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -content - [VARCHAR(1000)] - -created_at - [TIMESTAMP] - NOT NULL - -updated_at - [TIMESTAMP] - NOT NULL - -user_id - [INTEGER] + +task_instance_note + +dag_id + [VARCHAR(250)] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +content + [VARCHAR(1000)] + +created_at + [TIMESTAMP] + NOT NULL + +updated_at + [TIMESTAMP] + NOT NULL + +user_id + [INTEGER] - + ab_user--task_instance_note - -0..N -{0,1} + +0..N +{0,1} - + + +ab_register_user + +ab_register_user + +id + [INTEGER] + NOT NULL + +email + [VARCHAR(512)] + NOT NULL + +first_name + [VARCHAR(256)] + NOT NULL + +last_name + [VARCHAR(256)] + NOT NULL + +password + [VARCHAR(256)] + +registration_date + [TIMESTAMP] + +registration_hash + [VARCHAR(256)] + +username + [VARCHAR(512)] + NOT NULL + + -alembic_version - -alembic_version - -version_num - [VARCHAR(32)] - NOT NULL +connection + +connection + +id + [INTEGER] + NOT NULL + +conn_id + [VARCHAR(250)] + NOT NULL + +conn_type + [VARCHAR(500)] + NOT NULL + +description + [TEXT] + +extra + [TEXT] + +host + [VARCHAR(500)] + +is_encrypted + [BOOLEAN] + +is_extra_encrypted + [BOOLEAN] + +login + [TEXT] + +password + [TEXT] + +port + [INTEGER] + +schema + [VARCHAR(500)] callback_request - -callback_request - -id - [INTEGER] - NOT NULL - -callback_data - [JSON] - NOT NULL - -callback_type - [VARCHAR(20)] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - -priority_weight - [INTEGER] - NOT NULL - -processor_subdir - [VARCHAR(2000)] + +callback_request + +id + [INTEGER] + NOT NULL + +callback_data + [JSON] + NOT NULL + +callback_type + [VARCHAR(20)] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + +priority_weight + [INTEGER] + NOT NULL + +processor_subdir + [VARCHAR(2000)] - + -connection - -connection - -id - [INTEGER] - NOT NULL - -conn_id - [VARCHAR(250)] - NOT NULL - -conn_type - [VARCHAR(500)] - NOT NULL - -description - [VARCHAR(5000)] - -extra - [TEXT] - -host - [VARCHAR(500)] - -is_encrypted - [BOOLEAN] - -is_extra_encrypted - [BOOLEAN] - -login - [TEXT] - -password - [TEXT] - -port - [INTEGER] - -schema - [VARCHAR(500)] +import_error + +import_error + +id + [INTEGER] + NOT NULL + +filename + [VARCHAR(1024)] + +stacktrace + [TEXT] + +timestamp + [TIMESTAMP] - + -dag - -dag - -dag_id - [VARCHAR(250)] - NOT NULL - -default_view - [VARCHAR(25)] - -description - [TEXT] - -fileloc - [VARCHAR(2000)] - -has_import_errors - [BOOLEAN] - -has_task_concurrency_limits - [BOOLEAN] - NOT NULL - -is_active - [BOOLEAN] - -is_paused - [BOOLEAN] - -is_subdag - [BOOLEAN] - -last_expired - [TIMESTAMP] - -last_parsed_time - [TIMESTAMP] - -last_pickled - [TIMESTAMP] - -max_active_runs - [INTEGER] - -max_active_tasks - [INTEGER] - NOT NULL - -next_dagrun - [TIMESTAMP] - -next_dagrun_create_after - [TIMESTAMP] - -next_dagrun_data_interval_end - [TIMESTAMP] - -next_dagrun_data_interval_start - [TIMESTAMP] - -owners - [VARCHAR(2000)] - -pickle_id - [INTEGER] - -processor_subdir - [VARCHAR(2000)] - -root_dag_id - [VARCHAR(250)] - -schedule_interval - [TEXT] - -scheduler_lock - [BOOLEAN] - -timetable_description - [VARCHAR(1000)] +sla_miss + +sla_miss + +dag_id + [VARCHAR(250)] + NOT NULL + +execution_date + [TIMESTAMP] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +description + [TEXT] + +email_sent + [BOOLEAN] + +notification_sent + [BOOLEAN] + +timestamp + [TIMESTAMP] - + -dag_owner_attributes - -dag_owner_attributes - -dag_id - [VARCHAR(250)] - NOT NULL - -owner - [VARCHAR(500)] - NOT NULL - -link - [VARCHAR(500)] - NOT NULL +variable + +variable + +id + [INTEGER] + NOT NULL + +description + [TEXT] + +is_encrypted + [BOOLEAN] + +key + [VARCHAR(250)] + +val + [TEXT] - - -dag--dag_owner_attributes - -1 -1 + + +serialized_dag + +serialized_dag + +dag_id + [VARCHAR(250)] + NOT NULL + +dag_hash + [VARCHAR(32)] + NOT NULL + +data + [JSON] + +data_compressed + [BYTEA] + +fileloc + [VARCHAR(2000)] + NOT NULL + +fileloc_hash + [BIGINT] + NOT NULL + +last_updated + [TIMESTAMP] + NOT NULL + +processor_subdir + [VARCHAR(2000)] + + + +dataset + +dataset + +id + [INTEGER] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + +extra + [JSON] + NOT NULL + +is_orphaned + [BOOLEAN] + NOT NULL + +updated_at + [TIMESTAMP] + NOT NULL + +uri + [VARCHAR(3000)] + NOT NULL - + dag_schedule_dataset_reference - -dag_schedule_dataset_reference - -dag_id - [VARCHAR(250)] - NOT NULL - -dataset_id - [INTEGER] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - -updated_at - [TIMESTAMP] - NOT NULL + +dag_schedule_dataset_reference + +dag_id + [VARCHAR(250)] + NOT NULL + +dataset_id + [INTEGER] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + +updated_at + [TIMESTAMP] + NOT NULL + + + +dataset--dag_schedule_dataset_reference + +1 +1 + + + +task_outlet_dataset_reference + +task_outlet_dataset_reference + +dag_id + [VARCHAR(250)] + NOT NULL + +dataset_id + [INTEGER] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + +updated_at + [TIMESTAMP] + NOT NULL + + + +dataset--task_outlet_dataset_reference + +1 +1 + + + +dataset_dag_run_queue + +dataset_dag_run_queue + +dataset_id + [INTEGER] + NOT NULL + +target_dag_id + [VARCHAR(250)] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + + + +dataset--dataset_dag_run_queue + +1 +1 + + + +dag + +dag + +dag_id + [VARCHAR(250)] + NOT NULL + +default_view + [VARCHAR(25)] + +description + [TEXT] + +fileloc + [VARCHAR(2000)] + +has_import_errors + [BOOLEAN] + +has_task_concurrency_limits + [BOOLEAN] + NOT NULL + +is_active + [BOOLEAN] + +is_paused + [BOOLEAN] + +is_subdag + [BOOLEAN] + +last_expired + [TIMESTAMP] + +last_parsed_time + [TIMESTAMP] + +last_pickled + [TIMESTAMP] + +max_active_runs + [INTEGER] + +max_active_tasks + [INTEGER] + NOT NULL + +next_dagrun + [TIMESTAMP] + +next_dagrun_create_after + [TIMESTAMP] + +next_dagrun_data_interval_end + [TIMESTAMP] + +next_dagrun_data_interval_start + [TIMESTAMP] + +owners + [VARCHAR(2000)] + +pickle_id + [INTEGER] + +processor_subdir + [VARCHAR(2000)] + +root_dag_id + [VARCHAR(250)] + +schedule_interval + [TEXT] + +scheduler_lock + [BOOLEAN] + +timetable_description + [VARCHAR(1000)] - + dag--dag_schedule_dataset_reference - -1 -1 + +1 +1 + + + +dag--task_outlet_dataset_reference + +1 +1 + + + +dag--dataset_dag_run_queue + +1 +1 - + dag_tag - -dag_tag - -dag_id - [VARCHAR(250)] - NOT NULL - -name - [VARCHAR(100)] - NOT NULL + +dag_tag + +dag_id + [VARCHAR(250)] + NOT NULL + +name + [VARCHAR(100)] + NOT NULL - + dag--dag_tag - -1 -1 + +1 +1 + + + +dag_owner_attributes + +dag_owner_attributes + +dag_id + [VARCHAR(250)] + NOT NULL + +owner + [VARCHAR(500)] + NOT NULL + +link + [VARCHAR(500)] + NOT NULL + + + +dag--dag_owner_attributes + +1 +1 - + dag_warning - -dag_warning - -dag_id - [VARCHAR(250)] - NOT NULL - -warning_type - [VARCHAR(50)] - NOT NULL - -message - [TEXT] - NOT NULL - -timestamp - [TIMESTAMP] - NOT NULL + +dag_warning + +dag_id + [VARCHAR(250)] + NOT NULL + +warning_type + [VARCHAR(50)] + NOT NULL + +message + [TEXT] + NOT NULL + +timestamp + [TIMESTAMP] + NOT NULL dag--dag_warning - -1 -1 - - - -dataset_dag_run_queue - -dataset_dag_run_queue - -dataset_id - [INTEGER] - NOT NULL - -target_dag_id - [VARCHAR(250)] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - - - -dag--dataset_dag_run_queue - -1 -1 + +1 +1 - - -task_outlet_dataset_reference - -task_outlet_dataset_reference - -dag_id - [VARCHAR(250)] - NOT NULL - -dataset_id - [INTEGER] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - -updated_at - [TIMESTAMP] - NOT NULL - - - -dag--task_outlet_dataset_reference - -1 -1 - - - -dag_code - -dag_code - -fileloc_hash - [BIGINT] - NOT NULL - -fileloc - [VARCHAR(2000)] - NOT NULL - -last_updated - [TIMESTAMP] - NOT NULL - -source_code - [TEXT] - NOT NULL - - - -dag_pickle - -dag_pickle - -id - [INTEGER] - NOT NULL - -created_dttm - [TIMESTAMP] - -pickle - [BLOB] - -pickle_hash - [BIGINT] + + +log_template + +log_template + +id + [INTEGER] + NOT NULL + +created_at + [TIMESTAMP] + NOT NULL + +elasticsearch_id + [TEXT] + NOT NULL + +filename + [TEXT] + NOT NULL - + dag_run - -dag_run - -id - [INTEGER] - NOT NULL - -clear_number - [INTEGER] - NOT NULL - -conf - [BLOB] - -creating_job_id - [INTEGER] - -dag_hash - [VARCHAR(32)] - -dag_id - [VARCHAR(250)] - NOT NULL - -data_interval_end - [TIMESTAMP] - -data_interval_start - [TIMESTAMP] - -end_date - [TIMESTAMP] - -execution_date - [TIMESTAMP] - NOT NULL - -external_trigger - [BOOLEAN] - -last_scheduling_decision - [TIMESTAMP] - -log_template_id - [INTEGER] - -queued_at - [TIMESTAMP] - -run_id - [VARCHAR(250)] - NOT NULL - -run_type - [VARCHAR(50)] - NOT NULL - -start_date - [TIMESTAMP] - -state - [VARCHAR(50)] - -updated_at - [TIMESTAMP] + +dag_run + +id + [INTEGER] + NOT NULL + +clear_number + [INTEGER] + NOT NULL + +conf + [BYTEA] + +creating_job_id + [INTEGER] + +dag_hash + [VARCHAR(32)] + +dag_id + [VARCHAR(250)] + NOT NULL + +data_interval_end + [TIMESTAMP] + +data_interval_start + [TIMESTAMP] + +end_date + [TIMESTAMP] + +execution_date + [TIMESTAMP] + NOT NULL + +external_trigger + [BOOLEAN] + +last_scheduling_decision + [TIMESTAMP] + +log_template_id + [INTEGER] + +queued_at + [TIMESTAMP] + +run_id + [VARCHAR(250)] + NOT NULL + +run_type + [VARCHAR(50)] + NOT NULL + +start_date + [TIMESTAMP] + +state + [VARCHAR(50)] + +updated_at + [TIMESTAMP] + + + +log_template--dag_run + +0..N +{0,1} - + dag_run--dag_run_note - -1 -1 + +1 +1 - + dagrun_dataset_event - -dagrun_dataset_event - -dag_run_id - [INTEGER] - NOT NULL - -event_id - [INTEGER] - NOT NULL + +dagrun_dataset_event + +dag_run_id + [INTEGER] + NOT NULL + +event_id + [INTEGER] + NOT NULL - + dag_run--dagrun_dataset_event - -1 -1 + +1 +1 - + task_instance - -task_instance - -dag_id - [VARCHAR(250)] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -custom_operator_name - [VARCHAR(1000)] - -duration - [FLOAT] - -end_date - [TIMESTAMP] - -executor_config - [BLOB] - -external_executor_id - [VARCHAR(250)] - -hostname - [VARCHAR(1000)] - -job_id - [INTEGER] - -max_tries - [INTEGER] - -next_kwargs - [JSON] - -next_method - [VARCHAR(1000)] - -operator - [VARCHAR(1000)] - -pid - [INTEGER] - -pool - [VARCHAR(256)] - NOT NULL - -pool_slots - [INTEGER] - NOT NULL - -priority_weight - [INTEGER] - -queue - [VARCHAR(256)] - -queued_by_job_id - [INTEGER] - -queued_dttm - [TIMESTAMP] - -start_date - [TIMESTAMP] - -state - [VARCHAR(20)] - -trigger_id - [INTEGER] - -trigger_timeout - [DATETIME] - -try_number - [INTEGER] - -unixname - [VARCHAR(1000)] - -updated_at - [TIMESTAMP] + +task_instance + +dag_id + [VARCHAR(250)] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +custom_operator_name + [VARCHAR(1000)] + +duration + [DOUBLE_PRECISION] + +end_date + [TIMESTAMP] + +executor_config + [BYTEA] + +external_executor_id + [VARCHAR(250)] + +hostname + [VARCHAR(1000)] + +job_id + [INTEGER] + +max_tries + [INTEGER] + +next_kwargs + [JSON] + +next_method + [VARCHAR(1000)] + +operator + [VARCHAR(1000)] + +pid + [INTEGER] + +pool + [VARCHAR(256)] + NOT NULL + +pool_slots + [INTEGER] + NOT NULL + +priority_weight + [INTEGER] + +queue + [VARCHAR(256)] + +queued_by_job_id + [INTEGER] + +queued_dttm + [TIMESTAMP] + +start_date + [TIMESTAMP] + +state + [VARCHAR(20)] + +trigger_id + [INTEGER] + +trigger_timeout + [TIMESTAMP] + +try_number + [INTEGER] + +unixname + [VARCHAR(1000)] + +updated_at + [TIMESTAMP] - + dag_run--task_instance - -1 -1 + +1 +1 - + dag_run--task_instance - -1 -1 + +1 +1 - + task_reschedule - -task_reschedule - -id - [INTEGER] - NOT NULL - -dag_id - [VARCHAR(250)] - NOT NULL - -duration - [INTEGER] - NOT NULL - -end_date - [TIMESTAMP] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -reschedule_date - [TIMESTAMP] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -start_date - [TIMESTAMP] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -try_number - [INTEGER] - NOT NULL + +task_reschedule + +id + [INTEGER] + NOT NULL + +dag_id + [VARCHAR(250)] + NOT NULL + +duration + [INTEGER] + NOT NULL + +end_date + [TIMESTAMP] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +reschedule_date + [TIMESTAMP] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +start_date + [TIMESTAMP] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +try_number + [INTEGER] + NOT NULL - + dag_run--task_reschedule - -0..N -1 + +0..N +1 - + dag_run--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance--task_instance_note - -1 -1 + +1 +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 + +0..N +1 - + task_instance--task_reschedule - -0..N -1 - - - -rendered_task_instance_fields - -rendered_task_instance_fields - -dag_id - [VARCHAR(250)] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -k8s_pod_yaml - [JSON] - -rendered_fields - [JSON] - NOT NULL - - - -task_instance--rendered_task_instance_fields - -1 -1 - - - -task_instance--rendered_task_instance_fields - -1 -1 - - - -task_instance--rendered_task_instance_fields - -1 -1 - - - -task_instance--rendered_task_instance_fields - -1 -1 + +0..N +1 - + task_fail - -task_fail - -id - [INTEGER] - NOT NULL - -dag_id - [VARCHAR(250)] - NOT NULL - -duration - [INTEGER] - -end_date - [TIMESTAMP] - -map_index - [INTEGER] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -start_date - [TIMESTAMP] - -task_id - [VARCHAR(250)] - NOT NULL + +task_fail + +id + [INTEGER] + NOT NULL + +dag_id + [VARCHAR(250)] + NOT NULL + +duration + [INTEGER] + +end_date + [TIMESTAMP] + +map_index + [INTEGER] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +start_date + [TIMESTAMP] + +task_id + [VARCHAR(250)] + NOT NULL - + task_instance--task_fail - -0..N -1 + +0..N +1 - + task_instance--task_fail - -0..N -1 + +0..N +1 - + task_instance--task_fail - -0..N -1 + +0..N +1 - + task_instance--task_fail - -0..N -1 + +0..N +1 - + task_map - -task_map - -dag_id - [VARCHAR(250)] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -keys - [JSON] - -length - [INTEGER] - NOT NULL + +task_map + +dag_id + [VARCHAR(250)] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +keys + [JSON] + +length + [INTEGER] + NOT NULL - + task_instance--task_map - -1 -1 + +1 +1 - + task_instance--task_map - -1 -1 + +1 +1 - + task_instance--task_map - -1 -1 + +1 +1 - + task_instance--task_map - -1 -1 + +1 +1 - + xcom - -xcom - -dag_run_id - [INTEGER] - NOT NULL - -key - [VARCHAR(512)] - NOT NULL - -map_index - [INTEGER] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -dag_id - [VARCHAR(250)] - NOT NULL - -run_id - [VARCHAR(250)] - NOT NULL - -timestamp - [TIMESTAMP] - NOT NULL - -value - [BLOB] + +xcom + +dag_run_id + [INTEGER] + NOT NULL + +key + [VARCHAR(512)] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +dag_id + [VARCHAR(250)] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +timestamp + [TIMESTAMP] + NOT NULL + +value + [BYTEA] - + task_instance--xcom - -1 -1 + +0..N +1 - + task_instance--xcom - -0..N -1 + +1 +1 - + task_instance--xcom - -1 -1 + +1 +1 - + task_instance--xcom - -0..N -1 + +0..N +1 - - -log_template - -log_template - -id - [INTEGER] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - -elasticsearch_id - [TEXT] - NOT NULL - -filename - [TEXT] - NOT NULL + + +rendered_task_instance_fields + +rendered_task_instance_fields + +dag_id + [VARCHAR(250)] + NOT NULL + +map_index + [INTEGER] + NOT NULL + +run_id + [VARCHAR(250)] + NOT NULL + +task_id + [VARCHAR(250)] + NOT NULL + +k8s_pod_yaml + [JSON] + +rendered_fields + [JSON] + NOT NULL - + + +task_instance--rendered_task_instance_fields + +1 +1 + + + +task_instance--rendered_task_instance_fields + +1 +1 + + + +task_instance--rendered_task_instance_fields + +1 +1 + + + +task_instance--rendered_task_instance_fields + +1 +1 + + + +ab_role + +ab_role + +id + [INTEGER] + NOT NULL + +name + [VARCHAR(64)] + NOT NULL + + + +ab_role--ab_user_role + +0..N +{0,1} + + + +ab_permission_view_role + +ab_permission_view_role + +id + [INTEGER] + NOT NULL + +permission_view_id + [INTEGER] + +role_id + [INTEGER] + + -log_template--dag_run - -0..N -{0,1} +ab_role--ab_permission_view_role + +0..N +{0,1} - - -dataset - -dataset - -id - [INTEGER] - NOT NULL - -created_at - [TIMESTAMP] - NOT NULL - -extra - [JSON] - NOT NULL - -is_orphaned - [BOOLEAN] - NOT NULL - -updated_at - [TIMESTAMP] - NOT NULL - -uri - [VARCHAR(3000)] - NOT NULL + + +ab_permission + +ab_permission + +id + [INTEGER] + NOT NULL + +name + [VARCHAR(100)] + NOT NULL - + + +ab_permission_view + +ab_permission_view + +id + [INTEGER] + NOT NULL + +permission_id + [INTEGER] + +view_menu_id + [INTEGER] + + -dataset--dag_schedule_dataset_reference - -1 -1 +ab_permission--ab_permission_view + +0..N +{0,1} - + -dataset--dataset_dag_run_queue - -1 -1 +ab_permission_view--ab_permission_view_role + +0..N +{0,1} - + + +ab_view_menu + +ab_view_menu + +id + [INTEGER] + NOT NULL + +name + [VARCHAR(250)] + NOT NULL + + -dataset--task_outlet_dataset_reference - -1 -1 +ab_view_menu--ab_permission_view + +0..N +{0,1} - + dataset_event - -dataset_event - -id - [INTEGER] - NOT NULL - -dataset_id - [INTEGER] - NOT NULL - -extra - [JSON] - NOT NULL - -source_dag_id - [VARCHAR(250)] - -source_map_index - [INTEGER] - -source_run_id - [VARCHAR(250)] - -source_task_id - [VARCHAR(250)] - -timestamp - [TIMESTAMP] - NOT NULL + +dataset_event + +id + [INTEGER] + NOT NULL + +dataset_id + [INTEGER] + NOT NULL + +extra + [JSON] + NOT NULL + +source_dag_id + [VARCHAR(250)] + +source_map_index + [INTEGER] + +source_run_id + [VARCHAR(250)] + +source_task_id + [VARCHAR(250)] + +timestamp + [TIMESTAMP] + NOT NULL dataset_event--dagrun_dataset_event - -1 -1 - - - -import_error - -import_error - -id - [INTEGER] - NOT NULL - -filename - [VARCHAR(1024)] - -stacktrace - [TEXT] - -timestamp - [TIMESTAMP] - - - -job - -job - -id - [INTEGER] - NOT NULL - -dag_id - [VARCHAR(250)] - -end_date - [TIMESTAMP] - -executor_class - [VARCHAR(500)] - -hostname - [VARCHAR(500)] - -job_type - [VARCHAR(30)] - -latest_heartbeat - [TIMESTAMP] - -start_date - [TIMESTAMP] - -state - [VARCHAR(20)] - -unixname - [VARCHAR(1000)] - - - -log - -log - -id - [INTEGER] - NOT NULL - -dag_id - [VARCHAR(250)] - -dttm - [TIMESTAMP] - -event - [VARCHAR(30)] - -execution_date - [TIMESTAMP] - -extra - [TEXT] - -map_index - [INTEGER] - -owner - [VARCHAR(500)] - -owner_display_name - [VARCHAR(500)] - -task_id - [VARCHAR(250)] + +1 +1 - + trigger - -trigger - -id - [INTEGER] - NOT NULL - -classpath - [VARCHAR(1000)] - NOT NULL - -created_date - [TIMESTAMP] - NOT NULL - -kwargs - [JSON] - NOT NULL - -triggerer_id - [INTEGER] + +trigger + +id + [INTEGER] + NOT NULL + +classpath + [VARCHAR(1000)] + NOT NULL + +created_date + [TIMESTAMP] + NOT NULL + +kwargs + [JSON] + NOT NULL + +triggerer_id + [INTEGER] - + trigger--task_instance - -0..N -{0,1} - - - -serialized_dag - -serialized_dag - -dag_id - [VARCHAR(250)] - NOT NULL - -dag_hash - [VARCHAR(32)] - NOT NULL - -data - [JSON] - -data_compressed - [BLOB] - -fileloc - [VARCHAR(2000)] - NOT NULL - -fileloc_hash - [BIGINT] - NOT NULL - -last_updated - [TIMESTAMP] - NOT NULL - -processor_subdir - [VARCHAR(2000)] + +0..N +{0,1} - -session - -session - -id - [INTEGER] - NOT NULL - -data - [BLOB] - -expiry - [DATETIME] - -session_id - [VARCHAR(255)] - - - -sla_miss - -sla_miss - -dag_id - [VARCHAR(250)] - NOT NULL - -execution_date - [TIMESTAMP] - NOT NULL - -task_id - [VARCHAR(250)] - NOT NULL - -description - [TEXT] - -email_sent - [BOOLEAN] - -notification_sent - [BOOLEAN] - -timestamp - [TIMESTAMP] - - -slot_pool - -slot_pool - -id - [INTEGER] - NOT NULL - -description - [TEXT] - -include_deferred - [BOOLEAN] - NOT NULL - -pool - [VARCHAR(256)] - -slots - [INTEGER] +session + +session + +id + [INTEGER] + NOT NULL + +data + [BYTEA] + +expiry + [TIMESTAMP] + +session_id + [VARCHAR(255)] - + -variable - -variable - -id - [INTEGER] - NOT NULL - -description - [TEXT] - -is_encrypted - [BOOLEAN] - -key - [VARCHAR(250)] - -val - [TEXT] +alembic_version + +alembic_version + +version_num + [VARCHAR(32)] + NOT NULL diff --git a/images/breeze/output_shell.svg b/images/breeze/output_shell.svg index dc734cc1f0cbe5..b52fa6d5125487 100644 --- a/images/breeze/output_shell.svg +++ b/images/breeze/output_shell.svg @@ -1,4 +1,4 @@ - + diff --git a/images/breeze/output_shell.txt b/images/breeze/output_shell.txt index 60210d439b7214..287cbec8da84ed 100644 --- a/images/breeze/output_shell.txt +++ b/images/breeze/output_shell.txt @@ -1 +1 @@ -35d460908fdd02d88da2f81015b59b32 +9dd3658bf3e2e6e605c2bae9d350f162 diff --git a/scripts/ci/pre_commit/common_precommit_utils.py b/scripts/ci/pre_commit/common_precommit_utils.py index 17ae403b7a61c9..c9bf78d23c9922 100644 --- a/scripts/ci/pre_commit/common_precommit_utils.py +++ b/scripts/ci/pre_commit/common_precommit_utils.py @@ -20,10 +20,19 @@ import hashlib import os import re +import shlex +import shutil +import subprocess +import sys from pathlib import Path +from rich.console import Console + AIRFLOW_SOURCES_ROOT_PATH = Path(__file__).parents[3].resolve() AIRFLOW_BREEZE_SOURCES_PATH = AIRFLOW_SOURCES_ROOT_PATH / "dev" / "breeze" +DEFAULT_PYTHON_MAJOR_MINOR_VERSION = "3.8" + +console = Console(width=400, color_system="standard") def read_airflow_version() -> str: @@ -71,3 +80,85 @@ def get_directory_hash(directory: Path, skip_path_regexp: str | None = None) -> if file.is_file() and not file.name.startswith("."): sha.update(file.read_bytes()) return sha.hexdigest() + + +def initialize_breeze_precommit(name: str, file: str): + if name not in ("__main__", "__mp_main__"): + raise SystemExit( + "This file is intended to be executed as an executable program. You cannot use it as a module." + f"To run this script, run the ./{file} command" + ) + + if os.environ.get("SKIP_BREEZE_PRE_COMMITS"): + console.print("[yellow]Skipping breeze pre-commit as SKIP_BREEZE_PRE_COMMIT is set") + sys.exit(1) + if shutil.which("breeze") is None: + console.print( + "[red]The `breeze` command is not on path.[/]\n\n" + "[yellow]Please install breeze with `pipx install -e ./dev/breeze` from Airflow sources " + "and make sure you run `pipx ensurepath`[/]\n\n" + "[bright_blue]You can also set SKIP_BREEZE_PRE_COMMITS env variable to non-empty " + "value to skip all breeze tests." + ) + sys.exit(1) + + +def run_command_via_breeze_shell( + cmd: list[str], + python_version: str = DEFAULT_PYTHON_MAJOR_MINOR_VERSION, + backend: str = "none", + executor: str = "SequentialExecutor", + extra_env: dict[str, str] | None = None, + project_name: str = "pre-commit", + skip_environment_initialization: bool = True, + warn_image_upgrade_needed: bool = False, + **other_popen_kwargs, +) -> subprocess.CompletedProcess: + extra_env = extra_env or {} + subprocess_cmd: list[str] = [ + "breeze", + "shell", + "--python", + python_version, + "--backend", + backend, + "--executor", + executor, + "--quiet", + "--restart", + "--skip-image-upgrade-check", + "--tty", + "disabled", + ] + if warn_image_upgrade_needed: + subprocess_cmd.append("--warn-image-upgrade-needed") + if skip_environment_initialization: + subprocess_cmd.append("--skip-environment-initialization") + if project_name: + subprocess_cmd.extend(["--project-name", project_name]) + subprocess_cmd.append(" ".join([shlex.quote(arg) for arg in cmd])) + if os.environ.get("VERBOSE_COMMANDS"): + console.print( + f"[magenta]Running command: {' '.join([shlex.quote(item) for item in subprocess_cmd])}[/]" + ) + result = subprocess.run( + subprocess_cmd, + check=False, + text=True, + **other_popen_kwargs, + env={ + **os.environ, + "SKIP_BREEZE_SELF_UPGRADE_CHECK": "true", + "SKIP_GROUP_OUTPUT": "true", + "SKIP_SAVING_CHOICES": "true", + "ANSWER": "no", + **extra_env, + }, + ) + # Stop remaining containers + down_command = ["docker", "compose", "--progress", "quiet"] + if project_name: + down_command.extend(["--project-name", project_name]) + down_command.extend(["down", "--remove-orphans"]) + subprocess.run(down_command, check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return result diff --git a/scripts/ci/pre_commit/pre_commit_update_er_diagram.py b/scripts/ci/pre_commit/pre_commit_update_er_diagram.py index 0812065e6744eb..e660b47c6e6aef 100755 --- a/scripts/ci/pre_commit/pre_commit_update_er_diagram.py +++ b/scripts/ci/pre_commit/pre_commit_update_er_diagram.py @@ -17,54 +17,28 @@ # under the License. from __future__ import annotations -import os import sys from pathlib import Path -if __name__ not in ("__main__", "__mp_main__"): - raise SystemExit( - "This file is intended to be executed as an executable program. You cannot use it as a module." - f"To run this script, run the ./{__file__} command" - ) +sys.path.insert(0, str(Path(__file__).parent.resolve())) +from common_precommit_utils import console, initialize_breeze_precommit, run_command_via_breeze_shell +initialize_breeze_precommit(__name__, __file__) -if __name__ == "__main__": - AIRFLOW_SOURCES = Path(__file__).parents[3].resolve() - sys.path.insert(0, str(AIRFLOW_SOURCES / "dev" / "breeze" / "src")) - GITHUB_REPOSITORY = os.environ.get("GITHUB_REPOSITORY", "apache/airflow") - os.environ["SKIP_GROUP_OUTPUT"] = "true" - os.environ["SKIP_BREEZE_SELF_UPGRADE_CHECK"] = "true" - from airflow_breeze.global_constants import DEFAULT_PYTHON_MAJOR_MINOR_VERSION, MOUNT_SELECTED - from airflow_breeze.params.shell_params import ShellParams - from airflow_breeze.utils.console import get_console - from airflow_breeze.utils.docker_command_utils import ( - get_extra_docker_flags, - ) - from airflow_breeze.utils.run_utils import get_ci_image_for_pre_commits, run_command +cmd_result = run_command_via_breeze_shell( + ["python3", "/opt/airflow/scripts/in_container/run_prepare_er_diagram.py"], + backend="postgres", + project_name="pre-commit", + skip_environment_initialization=False, + warn_image_upgrade_needed=True, + extra_env={ + "DB_RESET": "true", + }, +) - shell_params = ShellParams(python=DEFAULT_PYTHON_MAJOR_MINOR_VERSION, db_reset=True, backend="none") - airflow_image = get_ci_image_for_pre_commits() - cmd_result = run_command( - [ - "docker", - "run", - "-t", - *get_extra_docker_flags(mount_sources=MOUNT_SELECTED), - "-e", - "AIRFLOW__DATABASE__SQL_ALCHEMY_CONN", - "--pull", - "never", - airflow_image, - "-c", - "python3 /opt/airflow/scripts/in_container/run_prepare_er_diagram.py", - ], - check=False, - env=shell_params.env_variables_for_docker_commands, +if cmd_result.returncode != 0: + console.print( + "[warning]\nIf you see strange stacktraces above, " + "run `breeze ci-image build --python 3.8` and try again." ) - if cmd_result.returncode != 0: - get_console().print( - "[warning]If you see strange stacktraces above, " - "run `breeze ci-image build --python 3.8` and try again." - ) - sys.exit(cmd_result.returncode) diff --git a/scripts/in_container/run_prepare_er_diagram.py b/scripts/in_container/run_prepare_er_diagram.py index 1d675dd8c26ad1..53e297394b4256 100755 --- a/scripts/in_container/run_prepare_er_diagram.py +++ b/scripts/in_container/run_prepare_er_diagram.py @@ -35,8 +35,9 @@ if __name__ == "__main__": from eralchemy2 import render_er - console = Console(width=400) + console = Console(width=400, color_system="standard") + console.print("[bright_blue]Preparing diagram for Airflow ERD") sha256hash = dirhash( MIGRATIONS_DIR, "sha256", excluded_extensions=["pyc"], ignore_hidden=True, include_paths=True ) @@ -65,3 +66,7 @@ console.print(f"[green]The diagram has been generated in {SVG_FILE}. Please commit the changes!") else: console.print("[green]Skip file generation as no files changes since last generation") + console.print( + f"[bright_blue]You can delete [magenta]{HASH_FILE.relative_to(AIRFLOW_SOURCES_ROOT)}[/] " + f"[bright_blue]to regenerate the diagrams.[/]" + )