From e71285996ed3a12466136eb404e3bd4aafdd4c73 Mon Sep 17 00:00:00 2001 From: e-halan Date: Mon, 20 May 2024 17:06:16 +0000 Subject: [PATCH] Deprecate AutoMLTablesListTableSpecsOperator and AutoMLTablesListColumnSpecsOperator --- .../google/cloud/operators/automl.py | 77 ++++--- .../run_provider_yaml_files_check.py | 2 + tests/always/test_project_structure.py | 2 + .../google/cloud/operators/test_automl.py | 196 ++++++++---------- .../cloud/automl/example_automl_dataset.py | 103 +++++---- .../cloud/automl/example_automl_model.py | 51 +---- .../automl/example_automl_translation.py | 13 +- 7 files changed, 192 insertions(+), 252 deletions(-) diff --git a/airflow/providers/google/cloud/operators/automl.py b/airflow/providers/google/cloud/operators/automl.py index 64c1c381519c46..e178e9fee21d8a 100644 --- a/airflow/providers/google/cloud/operators/automl.py +++ b/airflow/providers/google/cloud/operators/automl.py @@ -24,7 +24,6 @@ from functools import cached_property from typing import TYPE_CHECKING, Sequence, Tuple -from deprecated import deprecated from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault from google.cloud.automl_v1beta1 import ( BatchPredictResult, @@ -35,7 +34,7 @@ TableSpec, ) -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.hooks.vertex_ai.prediction_service import PredictionServiceHook from airflow.providers.google.cloud.links.translate import ( @@ -681,10 +680,18 @@ def execute(self, context: Context): ) +# AutoML: AutoML enables customers to leverage Google's transfer learning and Neural Architecture Search +# to build custom models using a variety of data types. +# AutoML Services include AutoML Natural Language, AutoML Tables, AutoML Translation, AutoML Video, +# and AutoML Vision. class AutoMLTablesListColumnSpecsOperator(GoogleCloudBaseOperator): """ Lists column specs in a table. + Operator AutoMLTablesListColumnSpecsOperator has been deprecated due to shutdown of + a legacy version of AutoML Tables on March 31, 2024. For additional information + see: https://cloud.google.com/automl-tables/docs/deprecations. + .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:AutoMLTablesListColumnSpecsOperator` @@ -759,6 +766,11 @@ def __init__( self.retry = retry self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + raise AirflowException( + "Operator AutoMLTablesListColumnSpecsOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations." + ) def execute(self, context: Context): hook = CloudAutoMLHook( @@ -791,20 +803,14 @@ def execute(self, context: Context): return result -@deprecated( - reason=( - "Class `AutoMLTablesUpdateDatasetOperator` has been deprecated and no longer available. " - "Please use `UpdateDatasetOperator` instead" - ), - category=AirflowProviderDeprecationWarning, - action="error", -) class AutoMLTablesUpdateDatasetOperator(GoogleCloudBaseOperator): """ Updates a dataset. - AutoMLTablesUpdateDatasetOperator has been deprecated and no longer available. Please use - :class:`airflow.providers.google.cloud.operators.vertex_ai.dataset.UpdateDatasetOperator` + Operator AutoMLTablesUpdateDatasetOperator has been deprecated due to shutdown of + a legacy version of AutoML Tables on March 31, 2024. For additional information + see: https://cloud.google.com/automl-tables/docs/deprecations. + Please use :class:`airflow.providers.google.cloud.operators.vertex_ai.dataset.UpdateDatasetOperator` instead. .. seealso:: @@ -864,6 +870,12 @@ def __init__( self.retry = retry self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + raise AirflowException( + "Operator AutoMLTablesUpdateDatasetOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. " + "Please use UpdateDatasetOperator from Vertex AI instead." + ) def execute(self, context: Context): hook = CloudAutoMLHook( @@ -1074,14 +1086,6 @@ def execute(self, context: Context): self.log.info("Deletion is completed") -@deprecated( - reason=( - "Class `AutoMLDeployModelOperator` has been deprecated and no longer available. Please use " - "`DeployModelOperator` instead" - ), - category=AirflowProviderDeprecationWarning, - action="error", -) class AutoMLDeployModelOperator(GoogleCloudBaseOperator): """ Deploys a model; if a model is already deployed, deploying it with the same parameters has no effect. @@ -1092,8 +1096,10 @@ class AutoMLDeployModelOperator(GoogleCloudBaseOperator): Only applicable for Text Classification, Image Object Detection and Tables; all other domains manage deployment automatically. - AutoMLDeployModelOperator has been deprecated and no longer available. Please use - :class:`airflow.providers.google.cloud.operators.vertex_ai.endpoint_service.DeployModelOperator` + Operator AutoMLDeployModelOperator has been deprecated due to shutdown of a legacy version + of AutoML Natural Language, Vision, Video Intelligence on March 31, 2024. + For additional information see: https://cloud.google.com/vision/automl/docs/deprecations . + Please use :class:`airflow.providers.google.cloud.operators.vertex_ai.endpoint_service.DeployModelOperator` instead. .. seealso:: @@ -1156,24 +1162,20 @@ def __init__( self.retry = retry self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + raise AirflowException( + "Operator AutoMLDeployModelOperator has been deprecated due to shutdown of " + "a legacy version of AutoML AutoML Natural Language, Vision, Video Intelligence " + "on March 31, 2024. " + "For additional information see: https://cloud.google.com/vision/automl/docs/deprecations. " + "Please use DeployModelOperator from Vertex AI instead." + ) def execute(self, context: Context): hook = CloudAutoMLHook( gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain, ) - model = hook.get_model( - model_id=self.model_id, - location=self.location, - project_id=self.project_id, - retry=self.retry, - timeout=self.timeout, - metadata=self.metadata, - ) - if not hasattr(model, "translation_model_metadata"): - _raise_exception_for_deprecated_operator(self.__class__.__name__, "DeployModelOperator") self.log.info("Deploying model_id %s", self.model_id) - operation = hook.deploy_model( model_id=self.model_id, location=self.location, @@ -1191,6 +1193,10 @@ class AutoMLTablesListTableSpecsOperator(GoogleCloudBaseOperator): """ Lists table specs in a dataset. + Operator AutoMLTablesListTableSpecsOperator has been deprecated due to shutdown of + a legacy version of AutoML Tables on March 31, 2024. For additional information + see: https://cloud.google.com/automl-tables/docs/deprecations. + .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:AutoMLTablesListTableSpecsOperator` @@ -1256,6 +1262,11 @@ def __init__( self.retry = retry self.gcp_conn_id = gcp_conn_id self.impersonation_chain = impersonation_chain + raise AirflowException( + "Operator AutoMLTablesListTableSpecsOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. " + ) def execute(self, context: Context): hook = CloudAutoMLHook( diff --git a/scripts/in_container/run_provider_yaml_files_check.py b/scripts/in_container/run_provider_yaml_files_check.py index d3cf8b1e2e7069..95978a04ddd09b 100755 --- a/scripts/in_container/run_provider_yaml_files_check.py +++ b/scripts/in_container/run_provider_yaml_files_check.py @@ -57,6 +57,8 @@ KNOWN_DEPRECATED_CLASSES = [ "airflow.providers.google.cloud.links.dataproc.DataprocLink", + "airflow.providers.google.cloud.operators.automl.AutoMLTablesListColumnSpecsOperator", + "airflow.providers.google.cloud.operators.automl.AutoMLTablesListTableSpecsOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTablesUpdateDatasetOperator", "airflow.providers.google.cloud.operators.automl.AutoMLDeployModelOperator", ] diff --git a/tests/always/test_project_structure.py b/tests/always/test_project_structure.py index ba4a4673515325..11af794670e165 100644 --- a/tests/always/test_project_structure.py +++ b/tests/always/test_project_structure.py @@ -363,6 +363,8 @@ class TestGoogleProviderProjectStructure(ExampleCoverageTest, AssetsCoverageTest ".CloudDataTransferServiceS3ToGCSOperator", "airflow.providers.google.cloud.operators.cloud_storage_transfer_service" ".CloudDataTransferServiceGCSToGCSOperator", + "airflow.providers.google.cloud.operators.automl.AutoMLTablesListColumnSpecsOperator", + "airflow.providers.google.cloud.operators.automl.AutoMLTablesListTableSpecsOperator", "airflow.providers.google.cloud.operators.automl.AutoMLTablesUpdateDatasetOperator", "airflow.providers.google.cloud.operators.automl.AutoMLDeployModelOperator", "airflow.providers.google.cloud.operators.dataproc.DataprocSubmitHadoopJobOperator", diff --git a/tests/providers/google/cloud/operators/test_automl.py b/tests/providers/google/cloud/operators/test_automl.py index f7bef5452193fb..985c5ef0aa991e 100644 --- a/tests/providers/google/cloud/operators/test_automl.py +++ b/tests/providers/google/cloud/operators/test_automl.py @@ -28,7 +28,7 @@ from google.api_core.gapic_v1.method import DEFAULT from google.cloud.automl_v1beta1 import BatchPredictResult, Dataset, Model, PredictResponse -from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning +from airflow.exceptions import AirflowException from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.hooks.vertex_ai.prediction_service import PredictionServiceHook from airflow.providers.google.cloud.operators.automl import ( @@ -393,66 +393,60 @@ def test_templating(self, create_task_instance_of_operator): assert task.impersonation_chain == "impersonation-chain" -class TestAutoMLListColumnsSpecsOperator: +class TestAutoMLTablesListColumnsSpecsOperator: + expected_exception_string = ( + "Operator AutoMLTablesListColumnSpecsOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations." + ) + @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") def test_execute(self, mock_hook): table_spec = "table_spec_id" filter_ = "filter" page_size = 42 - op = AutoMLTablesListColumnSpecsOperator( - dataset_id=DATASET_ID, - table_spec_id=table_spec, - location=GCP_LOCATION, - project_id=GCP_PROJECT_ID, - field_mask=MASK, - filter_=filter_, - page_size=page_size, - task_id=TASK_ID, - ) - op.execute(context=mock.MagicMock()) - mock_hook.return_value.list_column_specs.assert_called_once_with( - dataset_id=DATASET_ID, - field_mask=MASK, - filter_=filter_, - location=GCP_LOCATION, - metadata=(), - page_size=page_size, - project_id=GCP_PROJECT_ID, - retry=DEFAULT, - table_spec_id=table_spec, - timeout=None, - ) + with pytest.raises(AirflowException, match=self.expected_exception_string): + _ = AutoMLTablesListColumnSpecsOperator( + dataset_id=DATASET_ID, + table_spec_id=table_spec, + location=GCP_LOCATION, + project_id=GCP_PROJECT_ID, + field_mask=MASK, + filter_=filter_, + page_size=page_size, + task_id=TASK_ID, + ) + mock_hook.assert_not_called() @pytest.mark.db_test def test_templating(self, create_task_instance_of_operator): - ti = create_task_instance_of_operator( - AutoMLTablesListColumnSpecsOperator, - # Templated fields - dataset_id="{{ 'dataset-id' }}", - table_spec_id="{{ 'table-spec-id' }}", - field_mask="{{ 'field-mask' }}", - filter_="{{ 'filter-' }}", - location="{{ 'location' }}", - project_id="{{ 'project-id' }}", - impersonation_chain="{{ 'impersonation-chain' }}", - # Other parameters - dag_id="test_template_body_templating_dag", - task_id="test_template_body_templating_task", - execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), - ) - ti.render_templates() - task: AutoMLTablesListColumnSpecsOperator = ti.task - assert task.dataset_id == "dataset-id" - assert task.table_spec_id == "table-spec-id" - assert task.field_mask == "field-mask" - assert task.filter_ == "filter-" - assert task.location == "location" - assert task.project_id == "project-id" - assert task.impersonation_chain == "impersonation-chain" + with pytest.raises(AirflowException, match=self.expected_exception_string): + _ = create_task_instance_of_operator( + AutoMLTablesListColumnSpecsOperator, + # Templated fields + dataset_id="{{ 'dataset-id' }}", + table_spec_id="{{ 'table-spec-id' }}", + field_mask="{{ 'field-mask' }}", + filter_="{{ 'filter-' }}", + location="{{ 'location' }}", + project_id="{{ 'project-id' }}", + impersonation_chain="{{ 'impersonation-chain' }}", + # Other parameters + dag_id="test_template_body_templating_dag", + task_id="test_template_body_templating_task", + execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), + ) + +class TestAutoMLTablesUpdateDatasetOperator: + expected_exception_string = ( + "Operator AutoMLTablesUpdateDatasetOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. " + "Please use UpdateDatasetOperator from Vertex AI instead." + ) -class TestAutoMLUpdateDatasetOperator: @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") def test_execute(self, mock_hook): mock_hook.return_value.update_dataset.return_value = Dataset(name=DATASET_PATH) @@ -460,11 +454,7 @@ def test_execute(self, mock_hook): dataset = copy.deepcopy(DATASET) dataset["name"] = DATASET_ID - expected_exception_str = ( - r"Call to deprecated class AutoMLTablesUpdateDatasetOperator. \(Class " - r"`AutoMLTablesUpdateDatasetOperator` has been deprecated and no longer available" - ) - with pytest.raises(AirflowProviderDeprecationWarning, match=expected_exception_str): + with pytest.raises(AirflowException, match=self.expected_exception_string): AutoMLTablesUpdateDatasetOperator( dataset=dataset, update_mask=MASK, @@ -475,7 +465,7 @@ def test_execute(self, mock_hook): @pytest.mark.db_test def test_templating(self, create_task_instance_of_operator): - with pytest.raises(AirflowProviderDeprecationWarning) as err: + with pytest.raises(AirflowException, match=self.expected_exception_string): create_task_instance_of_operator( AutoMLTablesUpdateDatasetOperator, # Templated fields @@ -488,10 +478,6 @@ def test_templating(self, create_task_instance_of_operator): task_id="test_template_body_templating_task", execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), ) - assert str(err.value).startswith( - "Call to deprecated class AutoMLTablesUpdateDatasetOperator. " - "(Class `AutoMLTablesUpdateDatasetOperator` has been deprecated and no longer available" - ) class TestAutoMLGetModelOperator: @@ -635,15 +621,19 @@ def test_templating(self, create_task_instance_of_operator): class TestAutoMLDeployModelOperator: + expected_exception_string = ( + "Operator AutoMLDeployModelOperator has been deprecated due to shutdown of " + "a legacy version of AutoML AutoML Natural Language, Vision, Video Intelligence " + "on March 31, 2024. " + "For additional information see: https://cloud.google.com/vision/automl/docs/deprecations. " + "Please use DeployModelOperator from Vertex AI instead." + ) + @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") def test_execute(self, mock_hook): image_detection_metadata = {} - expected_exception_str = ( - r"Call to deprecated class AutoMLDeployModelOperator. \(Class `AutoMLDeployModelOperator` has " - r"been deprecated and no longer available" - ) - with pytest.raises(AirflowProviderDeprecationWarning, match=expected_exception_str): + with pytest.raises(AirflowException, match=self.expected_exception_string): AutoMLDeployModelOperator( model_id=MODEL_ID, image_detection_metadata=image_detection_metadata, @@ -656,7 +646,7 @@ def test_execute(self, mock_hook): @pytest.mark.db_test def test_templating(self, create_task_instance_of_operator): - with pytest.raises(AirflowProviderDeprecationWarning) as err: + with pytest.raises(AirflowException, match=self.expected_exception_string): create_task_instance_of_operator( AutoMLDeployModelOperator, # Templated fields @@ -670,11 +660,6 @@ def test_templating(self, create_task_instance_of_operator): execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), ) - assert str(err.value).startswith( - "Call to deprecated class AutoMLDeployModelOperator. " - "(Class `AutoMLDeployModelOperator` has been deprecated and no longer available" - ) - class TestAutoMLDatasetImportOperator: @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") @@ -751,53 +736,44 @@ def test_templating(self, create_task_instance_of_operator): class TestAutoMLTablesListTableSpecsOperator: + expected_exception_string = ( + "Operator AutoMLTablesListTableSpecsOperator has been deprecated due to shutdown of " + "a legacy version of AutoML Tables on March 31, 2024. " + "For additional information see: https://cloud.google.com/automl-tables/docs/deprecations. " + ) + @mock.patch("airflow.providers.google.cloud.operators.automl.CloudAutoMLHook") def test_execute(self, mock_hook): filter_ = "filter" page_size = 42 - op = AutoMLTablesListTableSpecsOperator( - dataset_id=DATASET_ID, - location=GCP_LOCATION, - project_id=GCP_PROJECT_ID, - filter_=filter_, - page_size=page_size, - task_id=TASK_ID, - ) - op.execute(context=mock.MagicMock()) - mock_hook.return_value.list_table_specs.assert_called_once_with( - dataset_id=DATASET_ID, - filter_=filter_, - location=GCP_LOCATION, - metadata=(), - page_size=page_size, - project_id=GCP_PROJECT_ID, - retry=DEFAULT, - timeout=None, - ) + with pytest.raises(AirflowException, match=self.expected_exception_string): + _ = AutoMLTablesListTableSpecsOperator( + dataset_id=DATASET_ID, + location=GCP_LOCATION, + project_id=GCP_PROJECT_ID, + filter_=filter_, + page_size=page_size, + task_id=TASK_ID, + ) + mock_hook.assert_not_called() @pytest.mark.db_test def test_templating(self, create_task_instance_of_operator): - ti = create_task_instance_of_operator( - AutoMLTablesListTableSpecsOperator, - # Templated fields - dataset_id="{{ 'dataset-id' }}", - filter_="{{ 'filter-' }}", - location="{{ 'location' }}", - project_id="{{ 'project-id' }}", - impersonation_chain="{{ 'impersonation-chain' }}", - # Other parameters - dag_id="test_template_body_templating_dag", - task_id="test_template_body_templating_task", - execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), - ) - ti.render_templates() - task: AutoMLTablesListTableSpecsOperator = ti.task - assert task.dataset_id == "dataset-id" - assert task.filter_ == "filter-" - assert task.location == "location" - assert task.project_id == "project-id" - assert task.impersonation_chain == "impersonation-chain" + with pytest.raises(AirflowException, match=self.expected_exception_string): + _ = create_task_instance_of_operator( + AutoMLTablesListTableSpecsOperator, + # Templated fields + dataset_id="{{ 'dataset-id' }}", + filter_="{{ 'filter-' }}", + location="{{ 'location' }}", + project_id="{{ 'project-id' }}", + impersonation_chain="{{ 'impersonation-chain' }}", + # Other parameters + dag_id="test_template_body_templating_dag", + task_id="test_template_body_templating_task", + execution_date=timezone.datetime(2024, 2, 1, tzinfo=timezone.utc), + ) class TestAutoMLDatasetListOperator: diff --git a/tests/system/providers/google/cloud/automl/example_automl_dataset.py b/tests/system/providers/google/cloud/automl/example_automl_dataset.py index 50950d761473d3..f90caf32df0c4a 100644 --- a/tests/system/providers/google/cloud/automl/example_automl_dataset.py +++ b/tests/system/providers/google/cloud/automl/example_automl_dataset.py @@ -16,59 +16,52 @@ # specific language governing permissions and limitations # under the License. -""" -Example Airflow DAG for Google AutoML service testing dataset operations. -""" +"""Example Airflow DAG for Google AutoML service testing dataset operations.""" from __future__ import annotations import os from datetime import datetime +from google.cloud import storage # type: ignore[attr-defined] + +from airflow.decorators import task from airflow.models.dag import DAG -from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.operators.automl import ( AutoMLCreateDatasetOperator, AutoMLDeleteDatasetOperator, AutoMLImportDataOperator, AutoMLListDatasetOperator, - AutoMLTablesListColumnSpecsOperator, - AutoMLTablesListTableSpecsOperator, ) from airflow.providers.google.cloud.operators.gcs import ( GCSCreateBucketOperator, GCSDeleteBucketOperator, - GCSSynchronizeBucketsOperator, ) +from airflow.providers.google.cloud.transfers.gcs_to_gcs import GCSToGCSOperator from airflow.utils.trigger_rule import TriggerRule ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default") -DAG_ID = "example_automl_dataset" +DAG_ID = "automl_dataset" GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default") GCP_AUTOML_LOCATION = "us-central1" RESOURCE_DATA_BUCKET = "airflow-system-tests-resources" DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-") -DATASET_NAME = f"ds_tabular_{ENV_ID}".replace("-", "_") +DATASET_NAME = f"ds_{DAG_ID}_{ENV_ID}".replace("-", "_") DATASET = { "display_name": DATASET_NAME, - "tables_dataset_metadata": {"target_column_spec_id": ""}, + "translation_dataset_metadata": { + "source_language_code": "en", + "target_language_code": "es", + }, } -AUTOML_DATASET_BUCKET = f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/tabular-classification.csv" -IMPORT_INPUT_CONFIG = {"gcs_source": {"input_uris": [AUTOML_DATASET_BUCKET]}} -extract_object_id = CloudAutoMLHook.extract_object_id - - -def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: - """ - Using column name returns spec of the column. - """ - for column in columns_specs: - if column["display_name"] == column_name: - return extract_object_id(column) - raise Exception(f"Unknown target column: {column_name}") +CSV_FILE_NAME = "en-es.csv" +TSV_FILE_NAME = "en-es.tsv" +GCS_FILE_PATH = f"automl/datasets/translate/{CSV_FILE_NAME}" +AUTOML_DATASET_BUCKET = f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/{CSV_FILE_NAME}" +IMPORT_INPUT_CONFIG = {"gcs_source": {"input_uris": [AUTOML_DATASET_BUCKET]}} with DAG( @@ -77,11 +70,6 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: start_date=datetime(2021, 1, 1), catchup=False, tags=["example", "automl", "dataset"], - user_defined_macros={ - "get_target_column_spec": get_target_column_spec, - "target": "Class", - "extract_object_id": extract_object_id, - }, ) as dag: create_bucket = GCSCreateBucketOperator( task_id="create_bucket", @@ -90,13 +78,32 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: location=GCP_AUTOML_LOCATION, ) - move_dataset_file = GCSSynchronizeBucketsOperator( - task_id="move_dataset_to_bucket", + @task + def upload_updated_csv_file_to_gcs(): + # download file into memory + storage_client = storage.Client() + bucket = storage_client.bucket(RESOURCE_DATA_BUCKET, GCP_PROJECT_ID) + blob = bucket.blob(GCS_FILE_PATH) + contents = blob.download_as_string().decode() + + # update file content + updated_contents = contents.replace("template-bucket", DATA_SAMPLE_GCS_BUCKET_NAME) + + # upload updated content to bucket + destination_bucket = storage_client.bucket(DATA_SAMPLE_GCS_BUCKET_NAME) + destination_blob = destination_bucket.blob(f"automl/{CSV_FILE_NAME}") + destination_blob.upload_from_string(updated_contents) + + # AutoML requires a .csv file with links to .tsv/.tmx files containing translation training data + upload_csv_dataset_file = upload_updated_csv_file_to_gcs() + + # The .tsv file contains training data with translated language pairs + copy_tsv_dataset_file = GCSToGCSOperator( + task_id="copy_dataset_file", source_bucket=RESOURCE_DATA_BUCKET, - source_object="automl/datasets/tabular", + source_object=f"automl/datasets/translate/{TSV_FILE_NAME}", destination_bucket=DATA_SAMPLE_GCS_BUCKET_NAME, - destination_object="automl", - recursive=True, + destination_object=f"automl/{TSV_FILE_NAME}", ) # [START howto_operator_automl_create_dataset] @@ -118,25 +125,6 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: ) # [END howto_operator_automl_import_data] - # [START howto_operator_automl_specs] - list_tables_spec = AutoMLTablesListTableSpecsOperator( - task_id="list_tables_spec", - dataset_id=dataset_id, - location=GCP_AUTOML_LOCATION, - project_id=GCP_PROJECT_ID, - ) - # [END howto_operator_automl_specs] - - # [START howto_operator_automl_column_specs] - list_columns_spec = AutoMLTablesListColumnSpecsOperator( - task_id="list_columns_spec", - dataset_id=dataset_id, - table_spec_id="{{ extract_object_id(task_instance.xcom_pull('list_tables_spec_task')[0]) }}", - location=GCP_AUTOML_LOCATION, - project_id=GCP_PROJECT_ID, - ) - # [END howto_operator_automl_column_specs] - # [START howto_operator_list_dataset] list_datasets = AutoMLListDatasetOperator( task_id="list_datasets", @@ -151,20 +139,23 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: dataset_id=dataset_id, location=GCP_AUTOML_LOCATION, project_id=GCP_PROJECT_ID, + trigger_rule=TriggerRule.ALL_DONE, ) # [END howto_operator_delete_dataset] delete_bucket = GCSDeleteBucketOperator( - task_id="delete_bucket", bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE + task_id="delete_bucket", + bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME, + trigger_rule=TriggerRule.ALL_DONE, ) ( # TEST SETUP - [create_bucket >> move_dataset_file, create_dataset] + [create_bucket >> upload_csv_dataset_file >> copy_tsv_dataset_file] + # create_bucket + >> create_dataset # TEST BODY >> import_dataset - >> list_tables_spec - >> list_columns_spec >> list_datasets # TEST TEARDOWN >> delete_dataset diff --git a/tests/system/providers/google/cloud/automl/example_automl_model.py b/tests/system/providers/google/cloud/automl/example_automl_model.py index d6c3ee9598c11e..16035956000236 100644 --- a/tests/system/providers/google/cloud/automl/example_automl_model.py +++ b/tests/system/providers/google/cloud/automl/example_automl_model.py @@ -16,9 +16,7 @@ # specific language governing permissions and limitations # under the License. -""" -Example Airflow DAG for Google AutoML service testing model operations. -""" +"""Example Airflow DAG for Google AutoML service testing model operations.""" from __future__ import annotations @@ -28,7 +26,6 @@ from google.protobuf.struct_pb2 import Value from airflow.models.dag import DAG -from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.operators.automl import ( AutoMLBatchPredictOperator, AutoMLCreateDatasetOperator, @@ -37,8 +34,6 @@ AutoMLGetModelOperator, AutoMLImportDataOperator, AutoMLPredictOperator, - AutoMLTablesListColumnSpecsOperator, - AutoMLTablesListTableSpecsOperator, AutoMLTrainModelOperator, ) from airflow.providers.google.cloud.operators.gcs import ( @@ -49,7 +44,7 @@ from airflow.utils.trigger_rule import TriggerRule ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default") -DAG_ID = "example_automl_model" +DAG_ID = "automl_model" GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default") GCP_AUTOML_LOCATION = "us-central1" @@ -57,7 +52,7 @@ DATA_SAMPLE_GCS_BUCKET_NAME = f"bucket_{DAG_ID}_{ENV_ID}".replace("_", "-") RESOURCE_DATA_BUCKET = "airflow-system-tests-resources" -DATASET_NAME = f"md_tabular_{ENV_ID}".replace("-", "_") +DATASET_NAME = f"ds_{DAG_ID}_{ENV_ID}".replace("-", "_") DATASET = { "display_name": DATASET_NAME, "tables_dataset_metadata": {"target_column_spec_id": ""}, @@ -69,7 +64,7 @@ } # change the name here -MODEL_NAME = f"md_tabular_{ENV_ID}".replace("-", "_") +MODEL_NAME = f"md_{DAG_ID}_{ENV_ID}".replace("-", "_") MODEL = { "display_name": MODEL_NAME, "tables_model_metadata": {"train_budget_milli_node_hours": 1000}, @@ -95,29 +90,12 @@ Value(string_value="unknown"), ] -extract_object_id = CloudAutoMLHook.extract_object_id - - -def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: - """ - Using column name returns spec of the column. - """ - for column in columns_specs: - if column["display_name"] == column_name: - return extract_object_id(column) - raise Exception(f"Unknown target column: {column_name}") - with DAG( dag_id=DAG_ID, schedule="@once", start_date=datetime(2021, 1, 1), catchup=False, - user_defined_macros={ - "get_target_column_spec": get_target_column_spec, - "target": "Deposit", - "extract_object_id": extract_object_id, - }, tags=["example", "automl", "model"], ) as dag: create_bucket = GCSCreateBucketOperator( @@ -152,21 +130,6 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: input_config=IMPORT_INPUT_CONFIG, ) - list_tables_spec = AutoMLTablesListTableSpecsOperator( - task_id="list_tables_spec", - dataset_id=dataset_id, - location=GCP_AUTOML_LOCATION, - project_id=GCP_PROJECT_ID, - ) - - list_columns_spec = AutoMLTablesListColumnSpecsOperator( - task_id="list_columns_spec", - dataset_id=dataset_id, - table_spec_id="{{ extract_object_id(task_instance.xcom_pull('list_tables_spec')[0]) }}", - location=GCP_AUTOML_LOCATION, - project_id=GCP_PROJECT_ID, - ) - # [START howto_operator_automl_create_model] create_model = AutoMLTrainModelOperator( task_id="create_model", @@ -229,15 +192,15 @@ def get_target_column_spec(columns_specs: list[dict], column_name: str) -> str: ) delete_bucket = GCSDeleteBucketOperator( - task_id="delete_bucket", bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE + task_id="delete_bucket", + bucket_name=DATA_SAMPLE_GCS_BUCKET_NAME, + trigger_rule=TriggerRule.ALL_DONE, ) ( # TEST SETUP [create_bucket >> move_dataset_file, create_dataset] >> import_dataset - >> list_tables_spec - >> list_columns_spec # TEST BODY >> create_model >> get_model diff --git a/tests/system/providers/google/cloud/automl/example_automl_translation.py b/tests/system/providers/google/cloud/automl/example_automl_translation.py index ba36f556c427de..41acdf764b8ecb 100644 --- a/tests/system/providers/google/cloud/automl/example_automl_translation.py +++ b/tests/system/providers/google/cloud/automl/example_automl_translation.py @@ -15,9 +15,8 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -""" -Example Airflow DAG that uses Google AutoML services. -""" + +"""Example Airflow DAG that uses Google AutoML Translation services.""" from __future__ import annotations @@ -31,7 +30,6 @@ from airflow.decorators import task from airflow.models.dag import DAG from airflow.models.xcom_arg import XComArg -from airflow.providers.google.cloud.hooks.automl import CloudAutoMLHook from airflow.providers.google.cloud.operators.automl import ( AutoMLCreateDatasetOperator, AutoMLDeleteDatasetOperator, @@ -43,7 +41,7 @@ from airflow.providers.google.cloud.transfers.gcs_to_gcs import GCSToGCSOperator from airflow.utils.trigger_rule import TriggerRule -DAG_ID = "example_automl_translate" +DAG_ID = "automl_translate" GCP_PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT", "default") ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID", "default") GCP_AUTOML_LOCATION = "us-central1" @@ -57,7 +55,7 @@ "translation_model_metadata": {}, } -DATASET_NAME = f"ds_translate_{ENV_ID}".replace("-", "_") +DATASET_NAME = f"ds_{DAG_ID}_{ENV_ID}".replace("-", "_") DATASET = { "display_name": DATASET_NAME, "translation_dataset_metadata": { @@ -72,8 +70,6 @@ AUTOML_DATASET_BUCKET = f"gs://{DATA_SAMPLE_GCS_BUCKET_NAME}/automl/{CSV_FILE_NAME}" IMPORT_INPUT_CONFIG = {"gcs_source": {"input_uris": [AUTOML_DATASET_BUCKET]}} -extract_object_id = CloudAutoMLHook.extract_object_id - # Example DAG for AutoML Translation with DAG( @@ -81,7 +77,6 @@ schedule="@once", start_date=datetime(2021, 1, 1), catchup=False, - user_defined_macros={"extract_object_id": extract_object_id}, tags=["example", "automl", "translate"], ) as dag: create_bucket = GCSCreateBucketOperator(