From d02d73c74575d34adf4f1e48ab82a42f1ff286da Mon Sep 17 00:00:00 2001 From: kirk Date: Mon, 29 Jul 2024 20:07:00 -0400 Subject: [PATCH 01/10] janky connection test POC for bigquery --- src/fides/api/util/connection_util.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/fides/api/util/connection_util.py b/src/fides/api/util/connection_util.py index d4b8b9da90..c8158101a4 100644 --- a/src/fides/api/util/connection_util.py +++ b/src/fides/api/util/connection_util.py @@ -43,6 +43,7 @@ from fides.api.schemas.connection_configuration.connection_secrets import ( TestStatusMessage, ) +from fides.api.schemas.connection_configuration.connection_secrets_bigquery import BigQuerySchema from fides.api.schemas.connection_configuration.connection_secrets_saas import ( validate_saas_secrets_external_references, ) @@ -279,7 +280,6 @@ def get_connection_config_or_error( ) -> ConnectionConfig: """Helper to load the ConnectionConfig object or throw a 404""" connection_config = ConnectionConfig.get_by(db, field="key", value=connection_key) - logger.info("Finding connection configuration with key '{}'", connection_key) if not connection_config: raise HTTPException( status_code=HTTP_404_NOT_FOUND, @@ -323,7 +323,25 @@ def connection_status( ) -> TestStatusMessage: """Connect, verify with a trivial query or API request, and report the status.""" - connector = get_connector(connection_config) + # test if this is a Bigquery connection + if connection_config.connection_type == ConnectionType.bigquery: + # run a different connection test where we pull all projects + from google.cloud.bigquery import Client as BigQueryClient + connector = get_connector(connection_config) + secrets = connector.configuration.secrets or {} + keyfile_creds = secrets.get("keyfile_creds", {}) + client = BigQueryClient.from_service_account_info(keyfile_creds) + dbs = [project for project in client.list_projects()] + if dbs: + status = ConnectionTestStatus.succeeded + else: + status = ConnectionTestStatus.failed + msg = "No projects found in BigQuery, connection test failed." + + return TestStatusMessage( + msg=msg, + test_status=status, + ) try: status: Optional[ConnectionTestStatus] = connector.test_connection() From 08e3263b58c0d2e2ae8507a6a14efdfe5d9515c3 Mon Sep 17 00:00:00 2001 From: kirk Date: Mon, 29 Jul 2024 20:08:22 -0400 Subject: [PATCH 02/10] add back in --- src/fides/api/util/connection_util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/fides/api/util/connection_util.py b/src/fides/api/util/connection_util.py index c8158101a4..9a7e99a9ad 100644 --- a/src/fides/api/util/connection_util.py +++ b/src/fides/api/util/connection_util.py @@ -280,6 +280,7 @@ def get_connection_config_or_error( ) -> ConnectionConfig: """Helper to load the ConnectionConfig object or throw a 404""" connection_config = ConnectionConfig.get_by(db, field="key", value=connection_key) + logger.info("Finding connection configuration with key '{}'", connection_key) if not connection_config: raise HTTPException( status_code=HTTP_404_NOT_FOUND, @@ -343,6 +344,8 @@ def connection_status( test_status=status, ) + connector = get_connector(connection_config) + try: status: Optional[ConnectionTestStatus] = connector.test_connection() From 4e7a8b268b57a85e455f1aff8dc98cdde311ecfc Mon Sep 17 00:00:00 2001 From: kirk Date: Tue, 30 Jul 2024 09:46:09 -0400 Subject: [PATCH 03/10] move to overriding the BigQueryConnector test connection method --- .../connection_secrets_bigquery.py | 4 ++++ .../api/service/connectors/fides_connector.py | 1 + .../api/service/connectors/sql_connector.py | 18 ++++++++++++++-- src/fides/api/util/connection_util.py | 21 ------------------- src/fides/connectors/models.py | 1 + 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py index 9c634f96f8..086c91f817 100644 --- a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py +++ b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py @@ -3,6 +3,7 @@ from pydantic import EmailStr, Field, parse_obj_as, validator from pydantic.main import BaseModel +from google.cloud.bigquery import Client as BigQueryClient from fides.api.schemas.base_class import NoValidationSchema from fides.api.schemas.connection_configuration.connection_secrets import ( @@ -47,6 +48,9 @@ def parse_keyfile_creds(cls, v: Union[str, dict]) -> KeyfileCreds: v = json.loads(v) return parse_obj_as(KeyfileCreds, v) + def get_client(self) -> BigQueryClient: + return BigQueryClient.from_service_account_info(self.keyfile_creds.dict()) + class BigQueryDocsSchema(BigQuerySchema, NoValidationSchema): """BigQuery Secrets Schema for API Docs""" diff --git a/src/fides/api/service/connectors/fides_connector.py b/src/fides/api/service/connectors/fides_connector.py index e4027a4344..0db38e197d 100644 --- a/src/fides/api/service/connectors/fides_connector.py +++ b/src/fides/api/service/connectors/fides_connector.py @@ -73,6 +73,7 @@ def test_connection(self) -> Optional[ConnectionTestStatus]: by attempting an authorized API call and ensuring success """ log.info("Starting test connection to {}", self.configuration.key) + log.info("INSIDE FIDES CONN") try: client: FidesClient = self.client() client.request_status() diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index 20abfa93af..9b7ec4f6e1 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -11,6 +11,7 @@ from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization from google.cloud.sql.connector import Connector +from google.cloud.bigquery import Client as BigQueryClient from google.oauth2 import service_account from loguru import logger from snowflake.sqlalchemy import URL as Snowflake_URL @@ -32,7 +33,7 @@ SSHTunnelConfigNotFoundException, ) from fides.api.graph.execution import ExecutionNode -from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus +from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus, ConnectionType from fides.api.models.policy import Policy from fides.api.models.privacy_request import PrivacyRequest, RequestTask from fides.api.schemas.connection_configuration import ( @@ -125,7 +126,6 @@ def query_config(self, node: ExecutionNode) -> SQLQueryConfig: def test_connection(self) -> Optional[ConnectionTestStatus]: """Connects to the SQL DB and makes a trivial query.""" - logger.info("Starting test connection to {}", self.configuration.key) try: engine = self.client() @@ -510,6 +510,20 @@ def query_config(self, node: ExecutionNode) -> BigQueryQueryConfig: """Query wrapper corresponding to the input execution_node.""" return BigQueryQueryConfig(node) + # Overrides SQLConnector.test_connection + def test_connection(self) -> Optional[ConnectionTestStatus]: + try: + bq_schema = BigQuerySchema(**self.configuration.secrets) + client = bq_schema.get_client() + all_datasets = [dataset for dataset in client.list_datasets()] + print(f"all_datasets: {all_datasets}") + if all_datasets: + return ConnectionTestStatus.succeeded + else: + raise ConnectionException("No datasets found with the provided credentials.") + except Exception as e: + raise ConnectionException(f"Connection error: {e}") + def mask_data( self, node: ExecutionNode, diff --git a/src/fides/api/util/connection_util.py b/src/fides/api/util/connection_util.py index 9a7e99a9ad..d4b8b9da90 100644 --- a/src/fides/api/util/connection_util.py +++ b/src/fides/api/util/connection_util.py @@ -43,7 +43,6 @@ from fides.api.schemas.connection_configuration.connection_secrets import ( TestStatusMessage, ) -from fides.api.schemas.connection_configuration.connection_secrets_bigquery import BigQuerySchema from fides.api.schemas.connection_configuration.connection_secrets_saas import ( validate_saas_secrets_external_references, ) @@ -324,26 +323,6 @@ def connection_status( ) -> TestStatusMessage: """Connect, verify with a trivial query or API request, and report the status.""" - # test if this is a Bigquery connection - if connection_config.connection_type == ConnectionType.bigquery: - # run a different connection test where we pull all projects - from google.cloud.bigquery import Client as BigQueryClient - connector = get_connector(connection_config) - secrets = connector.configuration.secrets or {} - keyfile_creds = secrets.get("keyfile_creds", {}) - client = BigQueryClient.from_service_account_info(keyfile_creds) - dbs = [project for project in client.list_projects()] - if dbs: - status = ConnectionTestStatus.succeeded - else: - status = ConnectionTestStatus.failed - msg = "No projects found in BigQuery, connection test failed." - - return TestStatusMessage( - msg=msg, - test_status=status, - ) - connector = get_connector(connection_config) try: diff --git a/src/fides/connectors/models.py b/src/fides/connectors/models.py index cc65d97106..6adbcdbfb2 100644 --- a/src/fides/connectors/models.py +++ b/src/fides/connectors/models.py @@ -2,6 +2,7 @@ # pylint: disable=C0115,C0116, E0213 from typing import List, Optional +from google.cloud.bigquery import Client as BigQueryClient from pydantic import BaseModel From 292da459a29af787153de9eb1ded2a5d65664358 Mon Sep 17 00:00:00 2001 From: kirk Date: Tue, 30 Jul 2024 09:48:10 -0400 Subject: [PATCH 04/10] revert unneeded --- src/fides/api/service/connectors/fides_connector.py | 1 - src/fides/connectors/models.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/fides/api/service/connectors/fides_connector.py b/src/fides/api/service/connectors/fides_connector.py index 0db38e197d..e4027a4344 100644 --- a/src/fides/api/service/connectors/fides_connector.py +++ b/src/fides/api/service/connectors/fides_connector.py @@ -73,7 +73,6 @@ def test_connection(self) -> Optional[ConnectionTestStatus]: by attempting an authorized API call and ensuring success """ log.info("Starting test connection to {}", self.configuration.key) - log.info("INSIDE FIDES CONN") try: client: FidesClient = self.client() client.request_status() diff --git a/src/fides/connectors/models.py b/src/fides/connectors/models.py index 6adbcdbfb2..cc65d97106 100644 --- a/src/fides/connectors/models.py +++ b/src/fides/connectors/models.py @@ -2,7 +2,6 @@ # pylint: disable=C0115,C0116, E0213 from typing import List, Optional -from google.cloud.bigquery import Client as BigQueryClient from pydantic import BaseModel From 338f772ce38086b8b33d6b3ec1704ee2bf20ab2f Mon Sep 17 00:00:00 2001 From: kirk Date: Tue, 30 Jul 2024 09:48:29 -0400 Subject: [PATCH 05/10] revert unneeded --- src/fides/api/service/connectors/sql_connector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index 9b7ec4f6e1..9482e0b028 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -33,7 +33,7 @@ SSHTunnelConfigNotFoundException, ) from fides.api.graph.execution import ExecutionNode -from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus, ConnectionType +from fides.api.models.connectionconfig import ConnectionConfig, ConnectionTestStatus from fides.api.models.policy import Policy from fides.api.models.privacy_request import PrivacyRequest, RequestTask from fides.api.schemas.connection_configuration import ( @@ -126,6 +126,7 @@ def query_config(self, node: ExecutionNode) -> SQLQueryConfig: def test_connection(self) -> Optional[ConnectionTestStatus]: """Connects to the SQL DB and makes a trivial query.""" + logger.info("Starting test connection to {}", self.configuration.key) try: engine = self.client() From f694506b394afcac9b7fe3b4a87c0c4b2820c2c8 Mon Sep 17 00:00:00 2001 From: kirk Date: Tue, 30 Jul 2024 09:51:48 -0400 Subject: [PATCH 06/10] projects, not datasets --- src/fides/api/service/connectors/sql_connector.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index 9482e0b028..4604a82fa8 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -516,9 +516,8 @@ def test_connection(self) -> Optional[ConnectionTestStatus]: try: bq_schema = BigQuerySchema(**self.configuration.secrets) client = bq_schema.get_client() - all_datasets = [dataset for dataset in client.list_datasets()] - print(f"all_datasets: {all_datasets}") - if all_datasets: + all_projects = [project for project in client.list_projects()] + if all_projects: return ConnectionTestStatus.succeeded else: raise ConnectionException("No datasets found with the provided credentials.") From 591456b5adb4ba5f78f46310cf1b609f1ae76b63 Mon Sep 17 00:00:00 2001 From: kirk Date: Tue, 30 Jul 2024 10:46:25 -0400 Subject: [PATCH 07/10] isort --- .../connection_configuration/connection_secrets_bigquery.py | 2 +- src/fides/api/service/connectors/sql_connector.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py index 086c91f817..38c03a74e7 100644 --- a/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py +++ b/src/fides/api/schemas/connection_configuration/connection_secrets_bigquery.py @@ -1,9 +1,9 @@ import json from typing import List, Optional, Union +from google.cloud.bigquery import Client as BigQueryClient from pydantic import EmailStr, Field, parse_obj_as, validator from pydantic.main import BaseModel -from google.cloud.bigquery import Client as BigQueryClient from fides.api.schemas.base_class import NoValidationSchema from fides.api.schemas.connection_configuration.connection_secrets import ( diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index 4604a82fa8..73a8becb13 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -10,8 +10,8 @@ from aiohttp.client_exceptions import ClientResponseError from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization -from google.cloud.sql.connector import Connector from google.cloud.bigquery import Client as BigQueryClient +from google.cloud.sql.connector import Connector from google.oauth2 import service_account from loguru import logger from snowflake.sqlalchemy import URL as Snowflake_URL From 16d65db4c965c945833221f7d03c3fb705a31073 Mon Sep 17 00:00:00 2001 From: kirk Date: Wed, 31 Jul 2024 11:23:40 -0400 Subject: [PATCH 08/10] docstring and log update --- src/fides/api/service/connectors/sql_connector.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index 73a8becb13..e2c158efab 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -513,6 +513,13 @@ def query_config(self, node: ExecutionNode) -> BigQueryQueryConfig: # Overrides SQLConnector.test_connection def test_connection(self) -> Optional[ConnectionTestStatus]: + """ + Overrides SQLConnector.test_connection with a BigQuery-specific connection test. + + The connection is tested using the native python client for BigQuery, since that is what's used + by the detection and discovery workflows/codepaths. + TODO: migrate the rest of this class, used for DSR execution, to also make use of the native bigquery client. + """ try: bq_schema = BigQuerySchema(**self.configuration.secrets) client = bq_schema.get_client() @@ -520,8 +527,14 @@ def test_connection(self) -> Optional[ConnectionTestStatus]: if all_projects: return ConnectionTestStatus.succeeded else: - raise ConnectionException("No datasets found with the provided credentials.") + logger.error( + f"No Bigquery Projects found with the provided credentials." + ) + raise ConnectionException( + "No Bigquery Projects found with the provided credentials." + ) except Exception as e: + logger.exception(f"Error testing connection to remote BigQuery {str(e)}") raise ConnectionException(f"Connection error: {e}") def mask_data( From 71f7613b88947abc58fd365d0501c0d0f954041f Mon Sep 17 00:00:00 2001 From: kirk Date: Wed, 31 Jul 2024 13:30:04 -0400 Subject: [PATCH 09/10] mypy & pylint fixes --- src/fides/api/service/connectors/sql_connector.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/fides/api/service/connectors/sql_connector.py b/src/fides/api/service/connectors/sql_connector.py index e2c158efab..e9e01953a0 100644 --- a/src/fides/api/service/connectors/sql_connector.py +++ b/src/fides/api/service/connectors/sql_connector.py @@ -10,7 +10,6 @@ from aiohttp.client_exceptions import ClientResponseError from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization -from google.cloud.bigquery import Client as BigQueryClient from google.cloud.sql.connector import Connector from google.oauth2 import service_account from loguru import logger @@ -521,18 +520,15 @@ def test_connection(self) -> Optional[ConnectionTestStatus]: TODO: migrate the rest of this class, used for DSR execution, to also make use of the native bigquery client. """ try: - bq_schema = BigQuerySchema(**self.configuration.secrets) + bq_schema = BigQuerySchema(**self.configuration.secrets or {}) client = bq_schema.get_client() all_projects = [project for project in client.list_projects()] if all_projects: return ConnectionTestStatus.succeeded - else: - logger.error( - f"No Bigquery Projects found with the provided credentials." - ) - raise ConnectionException( - "No Bigquery Projects found with the provided credentials." - ) + logger.error("No Bigquery Projects found with the provided credentials.") + raise ConnectionException( + "No Bigquery Projects found with the provided credentials." + ) except Exception as e: logger.exception(f"Error testing connection to remote BigQuery {str(e)}") raise ConnectionException(f"Connection error: {e}") From 7256029c54116ebbbfcaf2d1cebbae72f91d70a3 Mon Sep 17 00:00:00 2001 From: kirk Date: Wed, 31 Jul 2024 14:20:00 -0400 Subject: [PATCH 10/10] run connection test --- tests/fixtures/bigquery_fixtures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fixtures/bigquery_fixtures.py b/tests/fixtures/bigquery_fixtures.py index 14f8e63c8e..5cafbc44ec 100644 --- a/tests/fixtures/bigquery_fixtures.py +++ b/tests/fixtures/bigquery_fixtures.py @@ -163,6 +163,7 @@ def bigquery_test_engine() -> Generator: connector: BigQueryConnector = get_connector(connection_config) engine = connector.client() + connector.test_connection() yield engine engine.dispose()