From eb3c65a2068c95470f03f747162a71182edbd37c Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 12 May 2020 21:48:01 -0700 Subject: [PATCH] automl: tables code sample clean-up [(#3571)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3571) * delete unused tables_dataset samples * delete args code associated with unused automl_tables samples * delete tests associated with unused automl_tables samples * restore get_dataset method/yargs without region tagging * Restore update_dataset methodsa without region tagging Co-authored-by: Takashi Matsuo Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/tables/automl_tables_dataset.py | 330 ------------------------ samples/tables/dataset_test.py | 35 --- 2 files changed, 365 deletions(-) diff --git a/samples/tables/automl_tables_dataset.py b/samples/tables/automl_tables_dataset.py index d9970510..144f2ee6 100644 --- a/samples/tables/automl_tables_dataset.py +++ b/samples/tables/automl_tables_dataset.py @@ -116,84 +116,8 @@ def list_datasets(project_id, compute_region, filter_=None): return result -def list_table_specs( - project_id, compute_region, dataset_display_name, filter_=None -): - """List all table specs.""" - result = [] - # [START automl_tables_list_specs] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_display_name = 'DATASET_DISPLAY_NAME_HERE' - # filter_ = 'filter expression here' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # List all the table specs in the dataset by applying filter. - response = client.list_table_specs( - dataset_display_name=dataset_display_name, filter_=filter_ - ) - - print("List of table specs:") - for table_spec in response: - # Display the table_spec information. - print("Table spec name: {}".format(table_spec.name)) - print("Table spec id: {}".format(table_spec.name.split("/")[-1])) - print( - "Table spec time column spec id: {}".format( - table_spec.time_column_spec_id - ) - ) - print("Table spec row count: {}".format(table_spec.row_count)) - print("Table spec column count: {}".format(table_spec.column_count)) - - # [END automl_tables_list_specs] - result.append(table_spec) - - return result - - -def list_column_specs( - project_id, compute_region, dataset_display_name, filter_=None -): - """List all column specs.""" - result = [] - # [START automl_tables_list_column_specs] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_display_name = 'DATASET_DISPLAY_NAME_HERE' - # filter_ = 'filter expression here' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # List all the table specs in the dataset by applying filter. - response = client.list_column_specs( - dataset_display_name=dataset_display_name, filter_=filter_ - ) - - print("List of column specs:") - for column_spec in response: - # Display the column_spec information. - print("Column spec name: {}".format(column_spec.name)) - print("Column spec id: {}".format(column_spec.name.split("/")[-1])) - print("Column spec display name: {}".format(column_spec.display_name)) - print("Column spec data type: {}".format(column_spec.data_type)) - - # [END automl_tables_list_column_specs] - result.append(column_spec) - - return result - - def get_dataset(project_id, compute_region, dataset_display_name): """Get the dataset.""" - # [START automl_tables_get_dataset] # TODO(developer): Uncomment and set the following variables # project_id = 'PROJECT_ID_HERE' # compute_region = 'COMPUTE_REGION_HERE' @@ -217,83 +141,9 @@ def get_dataset(project_id, compute_region, dataset_display_name): print("\tseconds: {}".format(dataset.create_time.seconds)) print("\tnanos: {}".format(dataset.create_time.nanos)) - # [END automl_tables_get_dataset] - return dataset -def get_table_spec(project_id, compute_region, dataset_id, table_spec_id): - """Get the table spec.""" - # [START automl_tables_get_table_spec] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_id = 'DATASET_ID_HERE' - # table_spec_id = 'TABLE_SPEC_ID_HERE' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # Get the full path of the table spec. - table_spec_name = client.auto_ml_client.table_spec_path( - project_id, compute_region, dataset_id, table_spec_id - ) - - # Get complete detail of the table spec. - table_spec = client.get_table_spec(table_spec_name) - - # Display the table spec information. - print("Table spec name: {}".format(table_spec.name)) - print("Table spec id: {}".format(table_spec.name.split("/")[-1])) - print( - "Table spec time column spec id: {}".format( - table_spec.time_column_spec_id - ) - ) - print("Table spec row count: {}".format(table_spec.row_count)) - print("Table spec column count: {}".format(table_spec.column_count)) - - # [END automl_tables_get_table_spec] - - -def get_column_spec( - project_id, compute_region, dataset_id, table_spec_id, column_spec_id -): - """Get the column spec.""" - # [START automl_tables_get_column_spec] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_id = 'DATASET_ID_HERE' - # table_spec_id = 'TABLE_SPEC_ID_HERE' - # column_spec_id = 'COLUMN_SPEC_ID_HERE' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # Get the full path of the column spec. - column_spec_name = client.auto_ml_client.column_spec_path( - project_id, compute_region, dataset_id, table_spec_id, column_spec_id - ) - - # Get complete detail of the column spec. - column_spec = client.get_column_spec(column_spec_name) - - # Display the column spec information. - print("Column spec name: {}".format(column_spec.name)) - print("Column spec id: {}".format(column_spec.name.split("/")[-1])) - print("Column spec display name: {}".format(column_spec.display_name)) - print("Column spec data type: {}".format(column_spec.data_type)) - print("Column spec data stats: {}".format(column_spec.data_stats)) - print("Column spec top correlated columns\n") - for column_correlation in column_spec.top_correlated_columns: - print(column_correlation) - - # [END automl_tables_get_column_spec] - - def import_data(project_id, compute_region, dataset_display_name, path): """Import structured data.""" # [START automl_tables_import_data] @@ -327,32 +177,6 @@ def import_data(project_id, compute_region, dataset_display_name, path): # [END automl_tables_import_data] -def export_data(project_id, compute_region, dataset_display_name, gcs_uri): - """Export a dataset to a Google Cloud Storage bucket.""" - # [START automl_tables_export_data] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_display_name = 'DATASET_DISPLAY_NAME_HERE' - # gcs_uri: 'GCS_URI_HERE' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # Export the dataset to the output URI. - response = client.export_data( - dataset_display_name=dataset_display_name, - gcs_output_uri_prefix=gcs_uri, - ) - - print("Processing export...") - # synchronous check of operation status. - print("Data exported. {}".format(response.result())) - - # [END automl_tables_export_data] - - def update_dataset( project_id, compute_region, @@ -362,7 +186,6 @@ def update_dataset( test_train_column_spec_name=None, ): """Update dataset.""" - # [START automl_tables_update_dataset] # TODO(developer): Uncomment and set the following variables # project_id = 'PROJECT_ID_HERE' # compute_region = 'COMPUTE_REGION_HERE' @@ -394,71 +217,6 @@ def update_dataset( ) print("Test/train column updated. {}".format(response)) - # [END automl_tables_update_dataset] - - -def update_table_spec( - project_id, - compute_region, - dataset_display_name, - time_column_spec_display_name, -): - """Update table spec.""" - # [START automl_tables_update_table_spec] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_display_name = 'DATASET_DISPLAY_NAME_HERE' - # time_column_spec_display_name = 'time_column_spec_display_name_HERE' - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - response = client.set_time_column( - dataset_display_name=dataset_display_name, - column_spec_display_name=time_column_spec_display_name, - ) - - # synchronous check of operation status. - print("Table spec updated. {}".format(response)) - # [END automl_tables_update_table_spec] - - -def update_column_spec( - project_id, - compute_region, - dataset_display_name, - column_spec_display_name, - type_code, - nullable=None, -): - """Update column spec.""" - # [START automl_tables_update_column_spec] - # TODO(developer): Uncomment and set the following variables - # project_id = 'PROJECT_ID_HERE' - # compute_region = 'COMPUTE_REGION_HERE' - # dataset_display_name = 'DATASET_DISPLAY_NAME_HERE' - # column_spec_display_name = 'COLUMN_SPEC_DISPLAY_NAME_HERE' - # type_code = 'TYPE_CODE_HERE' - # nullable = 'NULLABLE_HERE' or None if unchanged - - from google.cloud import automl_v1beta1 as automl - - client = automl.TablesClient(project=project_id, region=compute_region) - - # Update the column spec. - response = client.update_column_spec( - dataset_display_name=dataset_display_name, - column_spec_display_name=column_spec_display_name, - type_code=type_code, - nullable=nullable, - ) - - # synchronous check of operation status. - print("Table spec updated. {}".format(response)) - # [END automl_tables_update_column_spec] - def delete_dataset(project_id, compute_region, dataset_display_name): """Delete a dataset""" @@ -497,48 +255,17 @@ def delete_dataset(project_id, compute_region, dataset_display_name): ) list_datasets_parser.add_argument("--filter_") - list_table_specs_parser = subparsers.add_parser( - "list_table_specs", help=list_table_specs.__doc__ - ) - list_table_specs_parser.add_argument("--dataset_display_name") - list_table_specs_parser.add_argument("--filter_") - - list_column_specs_parser = subparsers.add_parser( - "list_column_specs", help=list_column_specs.__doc__ - ) - list_column_specs_parser.add_argument("--dataset_display_name") - list_column_specs_parser.add_argument("--filter_") - get_dataset_parser = subparsers.add_parser( "get_dataset", help=get_dataset.__doc__ ) get_dataset_parser.add_argument("--dataset_display_name") - get_table_spec_parser = subparsers.add_parser( - "get_table_spec", help=get_table_spec.__doc__ - ) - get_table_spec_parser.add_argument("--dataset_id") - get_table_spec_parser.add_argument("--table_spec_id") - - get_column_spec_parser = subparsers.add_parser( - "get_column_spec", help=get_column_spec.__doc__ - ) - get_column_spec_parser.add_argument("--dataset_id") - get_column_spec_parser.add_argument("--table_spec_id") - get_column_spec_parser.add_argument("--column_spec_id") - import_data_parser = subparsers.add_parser( "import_data", help=import_data.__doc__ ) import_data_parser.add_argument("--dataset_display_name") import_data_parser.add_argument("--path") - export_data_parser = subparsers.add_parser( - "export_data", help=export_data.__doc__ - ) - export_data_parser.add_argument("--dataset_display_name") - export_data_parser.add_argument("--gcs_uri") - update_dataset_parser = subparsers.add_parser( "update_dataset", help=update_dataset.__doc__ ) @@ -547,20 +274,6 @@ def delete_dataset(project_id, compute_region, dataset_display_name): update_dataset_parser.add_argument("--weight_column_spec_name") update_dataset_parser.add_argument("--ml_use_column_spec_name") - update_table_spec_parser = subparsers.add_parser( - "update_table_spec", help=update_table_spec.__doc__ - ) - update_table_spec_parser.add_argument("--dataset_display_name") - update_table_spec_parser.add_argument("--time_column_spec_display_name") - - update_column_spec_parser = subparsers.add_parser( - "update_column_spec", help=update_column_spec.__doc__ - ) - update_column_spec_parser.add_argument("--dataset_display_name") - update_column_spec_parser.add_argument("--column_spec_display_name") - update_column_spec_parser.add_argument("--type_code") - update_column_spec_parser.add_argument("--nullable", type=bool) - delete_dataset_parser = subparsers.add_parser( "delete_dataset", help=delete_dataset.__doc__ ) @@ -574,39 +287,12 @@ def delete_dataset(project_id, compute_region, dataset_display_name): create_dataset(project_id, compute_region, args.dataset_name) if args.command == "list_datasets": list_datasets(project_id, compute_region, args.filter_) - if args.command == "list_table_specs": - list_table_specs( - project_id, compute_region, args.dataset_display_name, args.filter_ - ) - if args.command == "list_column_specs": - list_column_specs( - project_id, compute_region, args.dataset_display_name, args.filter_ - ) if args.command == "get_dataset": get_dataset(project_id, compute_region, args.dataset_display_name) - if args.command == "get_table_spec": - get_table_spec( - project_id, - compute_region, - args.dataset_display_name, - args.table_spec_id, - ) - if args.command == "get_column_spec": - get_column_spec( - project_id, - compute_region, - args.dataset_display_name, - args.table_spec_id, - args.column_spec_id, - ) if args.command == "import_data": import_data( project_id, compute_region, args.dataset_display_name, args.path ) - if args.command == "export_data": - export_data( - project_id, compute_region, args.dataset_display_name, args.gcs_uri - ) if args.command == "update_dataset": update_dataset( project_id, @@ -616,21 +302,5 @@ def delete_dataset(project_id, compute_region, dataset_display_name): args.weight_column_spec_name, args.ml_use_column_spec_name, ) - if args.command == "update_table_spec": - update_table_spec( - project_id, - compute_region, - args.dataset_display_name, - args.time_column_spec_display_name, - ) - if args.command == "update_column_spec": - update_column_spec( - project_id, - compute_region, - args.dataset_display_name, - args.column_spec_display_name, - args.type_code, - args.nullable, - ) if args.command == "delete_dataset": delete_dataset(project_id, compute_region, args.dataset_display_name) diff --git a/samples/tables/dataset_test.py b/samples/tables/dataset_test.py index 9a1e6aa0..ea25c3f1 100644 --- a/samples/tables/dataset_test.py +++ b/samples/tables/dataset_test.py @@ -96,21 +96,6 @@ def test_dataset_update(capsys): assert "Weight column updated." in out -def test_column_update(capsys): - dataset = ensure_dataset_ready() - automl_tables_dataset.update_column_spec( - PROJECT, - REGION, - dataset.display_name, - column_spec_display_name="Job", - type_code="CATEGORY", - nullable=False, - ) - - out, _ = capsys.readouterr() - assert "Table spec updated." in out - - def test_list_datasets(): ensure_dataset_ready() assert ( @@ -124,23 +109,3 @@ def test_list_datasets(): ) is not None ) - - -def test_list_table_specs(): - dataset = ensure_dataset_ready() - ts = automl_tables_dataset.list_table_specs( - PROJECT, REGION, dataset.display_name - ) - assert len(ts) > 0 - for t in ts: - assert t.name.startswith(dataset.name) - - -def test_list_column_specs(): - dataset = ensure_dataset_ready() - cs = automl_tables_dataset.list_column_specs( - PROJECT, REGION, dataset.display_name - ) - assert len(cs) > 0 - for c in cs: - assert c.name.startswith(dataset.name)