diff --git a/ingestion/setup.py b/ingestion/setup.py index 804f864e4e7e..f2dd6fb58d69 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -143,6 +143,7 @@ VERSIONS["pyarrow"], "sqlalchemy-bigquery>=1.2.2", }, + "bigtable": {"google-cloud-bigtable>=2.0.0", VERSIONS["pandas"]}, "clickhouse": {"clickhouse-driver~=0.2", "clickhouse-sqlalchemy~=0.2"}, "dagster": { VERSIONS["pymysql"], diff --git a/ingestion/src/metadata/examples/workflows/bigtable.yaml b/ingestion/src/metadata/examples/workflows/bigtable.yaml new file mode 100644 index 000000000000..652345d38bd1 --- /dev/null +++ b/ingestion/src/metadata/examples/workflows/bigtable.yaml @@ -0,0 +1,32 @@ +source: + type: bigtable + serviceName: local_bigtable + serviceConnection: + config: + type: BigTable + credentials: + gcpConfig: + type: service_account + projectId: project_id + privateKeyId: private_key_id + privateKey: private_key + clientEmail: gcpuser@project_id.iam.gserviceaccount.com + clientId: client_id + authUri: https://accounts.google.com/o/oauth2/auth + tokenUri: https://oauth2.googleapis.com/token + authProviderX509CertUrl: https://www.googleapis.com/oauth2/v1/certs + clientX509CertUrl: https://www.googleapis.com/oauth2/v1/certs + + sourceConfig: + config: + type: DatabaseMetadata +sink: + type: metadata-rest + config: {} +workflowConfig: + loggerLevel: DEBUG + openMetadataServerConfig: + hostPort: http://localhost:8585/api + authProvider: openmetadata + securityConfig: + jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/__init__.py b/ingestion/src/metadata/ingestion/source/database/bigtable/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/client.py b/ingestion/src/metadata/ingestion/source/database/bigtable/client.py new file mode 100644 index 000000000000..acb7e1883662 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/bigtable/client.py @@ -0,0 +1,62 @@ +# Copyright 2024 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A client for Google Cloud Bigtable that supports multiple projects.""" +from functools import partial +from typing import List, Optional, Type + +from google import auth +from google.cloud.bigtable import Client + +NoProject = object() + + +class MultiProjectClient: + """Google Cloud Client does not support ad-hoc project switching. This class wraps the client and allows + switching between projects. If no project is specified, the client will not have a project set and will try + to resolve it from ADC. + Example usage: + ``` + from google.cloud.bigtable import Client + client = MultiProjectClient(Client, project_ids=["project1", "project2"]) + instances_project1 = client.list_instances("project1") + instances_project2 = client.list_instances("project2") + """ + + def __init__( + self, + client_class: Type[Client], + project_ids: Optional[List[str]] = None, + **client_kwargs, + ): + if project_ids: + self.clients = { + project_id: client_class(project=project_id, **client_kwargs) + for project_id in project_ids + } + else: + self.clients = {NoProject: client_class(**client_kwargs)} + + def project_ids(self): + if NoProject in self.clients: + _, project_id = auth.default() + return [project_id] + return list(self.clients.keys()) + + def __getattr__(self, client_method): + """Return the underlying client method as a partial function so we can inject the project_id.""" + return partial(self._call, client_method) + + def _call(self, method, project_id, *args, **kwargs): + """Call the method on the client for the given project_id. The args and kwargs are passed through.""" + client = self.clients.get(project_id, self.clients.get(NoProject)) + if not client: + raise ValueError(f"Project {project_id} not found") + return getattr(client, method)(*args, **kwargs) diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py b/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py new file mode 100644 index 000000000000..754424d53270 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/bigtable/connection.py @@ -0,0 +1,116 @@ +# Copyright 2024 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""BigTable connection""" +from typing import List, Optional + +from google.cloud.bigtable import Client + +from metadata.generated.schema.entity.automations.workflow import ( + Workflow as AutomationWorkflow, +) +from metadata.generated.schema.entity.services.connections.database.bigTableConnection import ( + BigTableConnection, +) +from metadata.generated.schema.security.credentials.gcpValues import ( + GcpCredentialsValues, + SingleProjectId, +) +from metadata.ingestion.connections.test_connections import ( + SourceConnectionException, + test_connection_steps, +) +from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.ingestion.source.database.bigtable.client import MultiProjectClient +from metadata.utils.credentials import set_google_credentials +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + + +def get_connection(connection: BigTableConnection): + set_google_credentials(connection.credentials) + project_ids = None + if isinstance(connection.credentials.gcpConfig, GcpCredentialsValues): + project_ids = ( + [connection.credentials.gcpConfig.projectId.__root__] + if isinstance(connection.credentials.gcpConfig.projectId, SingleProjectId) + else connection.credentials.gcpConfig.projectId.__root__ + ) + # admin=True is required to list instances and tables + return MultiProjectClient(client_class=Client, project_ids=project_ids, admin=True) + + +def get_nested_index(lst: list, index: List[int], default=None): + try: + for i in index: + lst = lst[i] + return lst + except IndexError: + return default + + +class Tester: + """ + A wrapper class that holds state. We need it because the different testing stages + are not independent of each other. For example, we need to list instances before we can list + """ + + def __init__(self, client: MultiProjectClient): + self.client = client + self.project_id = None + self.instance = None + self.table = None + + def list_instances(self): + self.project_id = list(self.client.clients.keys())[0] + instances = list(self.client.list_instances(project_id=self.project_id)) + self.instance = get_nested_index(instances, [0, 0]) + + def list_tables(self): + if not self.instance: + raise SourceConnectionException( + f"No instances found in project {self.project_id}" + ) + tables = list(self.instance.list_tables()) + self.table = tables[0] + + def get_row(self): + if not self.table: + raise SourceConnectionException( + f"No tables found in project {self.instance.project_id} and instance {self.instance.instance_id}" + ) + self.table.read_rows(limit=1) + + +def test_connection( + metadata: OpenMetadata, + client: MultiProjectClient, + service_connection: BigTableConnection, + automation_workflow: Optional[AutomationWorkflow] = None, +) -> None: + """ + Test connection. This can be executed either as part + of a metadata workflow or during an Automation Workflow + """ + tester = Tester(client) + + test_fn = { + "GetInstances": tester.list_instances, + "GetTables": tester.list_tables, + "GetRows": tester.get_row, + } + + test_connection_steps( + metadata=metadata, + test_fn=test_fn, + service_type=service_connection.type.value, + automation_workflow=automation_workflow, + ) diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/metadata.py b/ingestion/src/metadata/ingestion/source/database/bigtable/metadata.py new file mode 100644 index 000000000000..a2a072db53f0 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/bigtable/metadata.py @@ -0,0 +1,224 @@ +# Copyright 2024 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Bigtable source methods. +""" +import traceback +from typing import Dict, Iterable, List, Optional, Union + +from google.cloud.bigtable import row_filters +from google.cloud.bigtable.instance import Instance +from google.cloud.bigtable.table import Table + +from metadata.generated.schema.entity.data.table import ( + ConstraintType, + TableConstraint, + TableType, +) +from metadata.generated.schema.entity.services.connections.database.bigTableConnection import ( + BigTableConnection, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + Source as WorkflowSource, +) +from metadata.ingestion.api.steps import InvalidSourceException +from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.ingestion.source.database.bigtable.client import MultiProjectClient +from metadata.ingestion.source.database.bigtable.models import Row +from metadata.ingestion.source.database.common_nosql_source import ( + SAMPLE_SIZE as GLOBAL_SAMPLE_SIZE, +) +from metadata.ingestion.source.database.common_nosql_source import CommonNoSQLSource +from metadata.ingestion.source.database.multi_db_source import MultiDBSource +from metadata.utils.logger import ingestion_logger + +logger = ingestion_logger() + +# BigTable group's its columns in column families. We make an assumption that if the table has a big number of +# columns, we at least get a sample of the first 100 column families. +MAX_COLUMN_FAMILIES = 100 +SAMPLES_PER_COLUMN_FAMILY = 100 + +ProjectId = str +InstanceId = str +TableId = str + + +class BigtableSource(CommonNoSQLSource, MultiDBSource): + """ + Implements the necessary methods to extract database metadata from Google BigTable Source. + BigTable is a NoSQL database service for handling large amounts of data. Tha mapping is as follows: + project -> instance -> table -> column_family.column + (database) (schema) + For more infor about BigTable: https://cloud.google.com/bigtable/?hl=en + All data types are registered as bytes. + """ + + def __init__(self, config: WorkflowSource, metadata: OpenMetadata): + super().__init__(config, metadata) + self.client: MultiProjectClient = self.connection_obj + + # ths instances and tables are cached to avoid making redundant requests to the API. + self.instances: Dict[ProjectId, Dict[InstanceId, Instance]] = {} + self.tables: Dict[ProjectId, Dict[InstanceId, Dict[TableId, Table]]] = {} + + @classmethod + def create(cls, config_dict, metadata: OpenMetadata): + config: WorkflowSource = WorkflowSource.parse_obj(config_dict) + connection: BigTableConnection = config.serviceConnection.__root__.config + if not isinstance(connection, BigTableConnection): + raise InvalidSourceException( + f"Expected BigTableConnection, but got {connection}" + ) + return cls(config, metadata) + + def get_configured_database(self) -> Optional[str]: + """ + This connector uses "virtual databases" in the form of GCP projects. + The concept of a default project for the GCP client is not useful here because the project ID + is always an explicit part of the connection. Therefore, this method returns None and the databases + are resolved using `self.get_database_names`. + """ + return None + + def get_database_names(self) -> Iterable[str]: + return self.get_database_names_raw() + + def get_database_names_raw(self) -> Iterable[str]: + yield from self.client.project_ids() + + def get_schema_name_list(self) -> List[str]: + project_id = self.context.database + try: + # the first element is a list of instances + # the second element is another collection (seems empty) and I do not know what is its purpose + instances, _ = self.client.list_instances(project_id=project_id) + self.instances[project_id] = { + instance.instance_id: instance for instance in instances + } + return list(self.instances[project_id].keys()) + except Exception as err: + logger.debug(traceback.format_exc()) + logger.error( + f"Failed to list BigTable instances in project {project_id}: {err}" + ) + raise + + def get_table_name_list(self, schema_name: str) -> List[str]: + project_id = self.context.database + try: + instance = self._get_instance(project_id, schema_name) + if instance is None: + raise RuntimeError(f"Instance {project_id}/{schema_name} not found.") + tables = instance.list_tables() + for table in tables: + self._set_nested( + self.tables, + [project_id, instance.instance_id, table.table_id], + table, + ) + return list(self.tables[project_id][schema_name].keys()) + except Exception as err: + logger.debug(traceback.format_exc()) + # add context to the error message + logger.error( + f"Failed to list BigTable table names in {project_id}.{schema_name}: {err}" + ) + return [] + + def get_table_constraints( + self, db_name: str, schema_name: str, table_name: str + ) -> List[TableConstraint]: + return [ + TableConstraint( + constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"] + ) + ] + + def get_table_columns_dict( + self, schema_name: str, table_name: str + ) -> Union[List[Dict], Dict]: + project_id = self.context.database + try: + table = self._get_table(project_id, schema_name, table_name) + if table is None: + raise RuntimeError( + f"Table {project_id}/{schema_name}/{table_name} not found." + ) + column_families = table.list_column_families() + # all BigTable tables have a "row_key" column. Even if there are no records in the table. + records = [{"row_key": b"row_key"}] + # In order to get a "good" sample of data, we try to distribute the sampling + # across multiple column families. + for cf in list(column_families.keys())[:MAX_COLUMN_FAMILIES]: + records.extend( + self._get_records_for_column_family( + table, cf, SAMPLES_PER_COLUMN_FAMILY + ) + ) + if len(records) >= GLOBAL_SAMPLE_SIZE: + break + return records + except Exception as err: + logger.debug(traceback.format_exc()) + logger.warning( + f"Failed to read BigTable rows for [{project_id}.{schema_name}.{table_name}]: {err}" + ) + return [] + + def get_source_url( + self, + database_name: Optional[str] = None, + schema_name: Optional[str] = None, + table_name: Optional[str] = None, + table_type: Optional[TableType] = None, + ) -> Optional[str]: + """ + Method to get the source url for a BigTable table + """ + try: + if schema_name and table_name: + return ( + "https://console.cloud.google.com/bigtable/instances/" + f"{schema_name}/tables/{table_name}/overview?project={database_name}" + ) + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.error(f"Unable to get source url: {exc}") + return None + + @staticmethod + def _set_nested(dct: dict, keys: List[str], value: any) -> None: + for key in keys[:-1]: + dct = dct.setdefault(key, {}) + dct[keys[-1]] = value + + @staticmethod + def _get_records_for_column_family( + table: Table, column_family: str, limit: int + ) -> List[Dict]: + filter_ = row_filters.ColumnRangeFilter(column_family_id=column_family) + rows = table.read_rows(limit=limit, filter_=filter_) + return [Row.from_partial_row(row).to_record() for row in rows] + + def _get_table( + self, project_id: str, schema_name: str, table_name: str + ) -> Optional[Table]: + try: + return self.tables[project_id][schema_name][table_name] + except KeyError: + return None + + def _get_instance(self, project_id: str, schema_name: str) -> Optional[Instance]: + try: + return self.instances[project_id][schema_name] + except KeyError: + return None diff --git a/ingestion/src/metadata/ingestion/source/database/bigtable/models.py b/ingestion/src/metadata/ingestion/source/database/bigtable/models.py new file mode 100644 index 000000000000..f8da387c8a55 --- /dev/null +++ b/ingestion/src/metadata/ingestion/source/database/bigtable/models.py @@ -0,0 +1,60 @@ +# Copyright 2024 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Bigtable source models. +""" +from typing import Dict, List + +from google.cloud.bigtable.row import PartialRowData +from pydantic import BaseModel + + +class Value(BaseModel): + """A Bigtable cell value.""" + + timestamp: int + value: bytes + + +class Cell(BaseModel): + """A Bigtable cell.""" + + values: List[Value] + + +class Row(BaseModel): + """A Bigtable row.""" + + cells: Dict[str, Dict[bytes, Cell]] + row_key: bytes + + @classmethod + def from_partial_row(cls, row: PartialRowData): + cells = {} + for cf, cf_cells in row.cells.items(): + cells.setdefault(cf, {}) + for column, cell in cf_cells.items(): + cells[cf][column] = Cell( + values=[Value(timestamp=c.timestamp, value=c.value) for c in cell] + ) + return cls(cells=cells, row_key=row.row_key) + + def to_record(self) -> Dict[str, bytes]: + record = {} + for cf, cells in self.cells.items(): + for column, cell in cells.items(): + # Since each cell can have multiple values and the API returns them in descending order + # from latest to oldest, we only take the latest value. This probably does not matter since + # all we care about is data types and all data stored in BigTable is of type `bytes`. + record[f"{cf}.{column.decode()}"] = cell.values[0].value + record["row_key"] = self.row_key + + return record diff --git a/ingestion/src/metadata/ingestion/source/database/common_nosql_source.py b/ingestion/src/metadata/ingestion/source/database/common_nosql_source.py index e9f394243ff2..8956abb27ddc 100644 --- a/ingestion/src/metadata/ingestion/source/database/common_nosql_source.py +++ b/ingestion/src/metadata/ingestion/source/database/common_nosql_source.py @@ -28,7 +28,11 @@ from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest from metadata.generated.schema.entity.data.database import Database from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema -from metadata.generated.schema.entity.data.table import Table, TableType +from metadata.generated.schema.entity.data.table import ( + Table, + TableConstraint, + TableType, +) from metadata.generated.schema.entity.services.ingestionPipelines.status import ( StackTraceError, ) @@ -203,6 +207,15 @@ def get_table_columns_dict( need to be overridden by sources """ + def get_table_constraints( + self, + db_name: str, + schema_name: str, + table_name: str, + ) -> Optional[List[TableConstraint]]: + # pylint: disable=unused-argument + return None + def yield_table( self, table_name_and_type: Tuple[str, str] ) -> Iterable[Either[CreateTableRequest]]: @@ -223,7 +236,11 @@ def yield_table( name=table_name, tableType=table_type, columns=columns, - tableConstraints=None, + tableConstraints=self.get_table_constraints( + schema_name=schema_name, + table_name=table_name, + db_name=self.context.database, + ), databaseSchema=fqn.build( metadata=self.metadata, entity_type=DatabaseSchema, diff --git a/ingestion/src/metadata/utils/datalake/datalake_utils.py b/ingestion/src/metadata/utils/datalake/datalake_utils.py index 369b1e3e0e21..e067443090f9 100644 --- a/ingestion/src/metadata/utils/datalake/datalake_utils.py +++ b/ingestion/src/metadata/utils/datalake/datalake_utils.py @@ -169,6 +169,7 @@ class GenericDataFrameColumnParser: ["datetime64", "timedelta[ns]", "datetime64[ns]"], DataType.DATETIME ), "str": DataType.STRING, + "bytes": DataType.BYTES, } def __init__(self, data_frame: "DataFrame"): @@ -247,8 +248,13 @@ def fetch_col_types(cls, data_frame, column_name): data_type = "string" data_type = cls._data_formats.get( - data_type or data_frame[column_name].dtypes.name, DataType.STRING + data_type or data_frame[column_name].dtypes.name, ) + if not data_type: + logger.debug( + f"unknown data type {data_frame[column_name].dtypes.name}. resolving to string." + ) + data_type = data_type or DataType.STRING except Exception as err: logger.warning( f"Failed to distinguish data type for column {column_name}, Falling back to {data_type}, exc: {err}" diff --git a/ingestion/tests/unit/topology/database/test_bigtable.py b/ingestion/tests/unit/topology/database/test_bigtable.py new file mode 100644 index 000000000000..6ca32b5f8475 --- /dev/null +++ b/ingestion/tests/unit/topology/database/test_bigtable.py @@ -0,0 +1,290 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test MongoDB using the topology +""" + +import json +from pathlib import Path +from unittest import TestCase +from unittest.mock import Mock, patch + +import pytest + +from metadata.generated.schema.api.data.createTable import CreateTableRequest +from metadata.generated.schema.entity.data.database import Database +from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema +from metadata.generated.schema.entity.data.table import ( + Column, + ConstraintType, + DataType, + TableConstraint, + TableType, +) +from metadata.generated.schema.entity.services.databaseService import ( + DatabaseConnection, + DatabaseService, + DatabaseServiceType, +) +from metadata.generated.schema.metadataIngestion.workflow import ( + OpenMetadataWorkflowConfig, +) +from metadata.generated.schema.type.basic import SourceUrl +from metadata.generated.schema.type.entityReference import EntityReference +from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.ingestion.source.database.bigtable.metadata import BigtableSource + +mock_file_path = ( + Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json" +) +with open(mock_file_path) as file: + mock_data: dict = json.load(file) + +mock_bigtable_config = { + "source": { + "type": "bigtable", + "serviceName": "local_bigtable", + "serviceConnection": { + "config": { + "type": "BigTable", + "credentials": { + "gcpConfig": { + "type": "service_account", + "projectId": "my-gcp-project", + "privateKeyId": "private_key_id", + # this is a valid key that was generated on a local machine and is not used for any real project + "privateKey": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpQIBAAKCAQEAw3vHG9fDIkcYB0xi2Mv4fS2gUzKR9ZRrcVNeKkqGFTT71AVB\nOzgIqYVe8b2aWODuNye6sipcrqTqOt05Esj+sxhk5McM9bE2RlxXC5QH/Bp9zxMP\n/Yksv9Ov7fdDt/loUk7sTXvI+7LDJfmRYU6MtVjyyLs7KpQIB2xBWEToU1xZY+v0\ndRC1NA+YWc+FjXbAiFAf9d4gXkYO8VmU5meixVh4C8nsjokEXk0T/HEItpZCxadk\ndZ7LKUE/HDmWCO2oNG6sCf4ET2crjSdYIfXuREopX1aQwnk7KbI4/YIdlRz1I369\nAz3+Hxlf9lLJVH3+itN4GXrR9yWWKWKDnwDPbQIDAQABAoIBAQC3X5QuTR7SN8iV\niBUtc2D84+ECSmza5shG/UJW/6N5n0Mf53ICgBS4GNEwiYCRISa0/ILIgK6CcVb7\nsuvH8F3kWNzEMui4TO0x4YsR5GH9HkioCCS224frxkLBQnL20HIIy9ok8Rpe6Zjg\nNZUnp4yczPyqSeA9l7FUbTt69uDM2Cx61m8REOpFukpnYLyZGbmNPYmikEO+rq9r\nwNID5dkSeVuQYo4MQdRavOGFUWvUYXzkEQ0A6vPyraVBfolESX8WaLNVjic7nIa3\nujdSNojnJqGJ3gslntcmN1d4JOfydc4bja4/NdNlcOHpWDGLzY1QnaDe0Koxn8sx\nLT9MVD2NAoGBAPy7r726bKVGWcwqTzUuq1OWh5c9CAc4N2zWBBldSJyUdllUq52L\nWTyva6GRoRzCcYa/dKLLSM/k4eLf9tpxeIIfTOMsvzGtbAdm257ndMXNvfYpxCfU\nK/gUFfAUGHZ3MucTHRY6DTkJg763Sf6PubA2fqv3HhVZDK/1HGDtHlTPAoGBAMYC\npdV7O7lAyXS/d9X4PQZ4BM+P8MbXEdGBbPPlzJ2YIb53TEmYfSj3z41u9+BNnhGP\n4uzUyAR/E4sxrA2+Ll1lPSCn+KY14WWiVGfWmC5j1ftdpkbrXstLN8NpNYzrKZwx\njdR0ZkwvZ8B5+kJ1hK96giwWS+SJxJR3TohcQ18DAoGAJSfmv2r//BBqtURnHrd8\nwq43wvlbC8ytAVg5hA0d1r9Q4vM6w8+vz+cuWLOTTyobDKdrG1/tlXrd5r/sh9L0\n15SIdkGm3kPTxQbPNP5sQYRs8BrV1tEvoao6S3B45DnEBwrdVN42AXOvpcNGoqE4\nuHpahyeuiY7s+ZV8lZdmxSsCgYEAolr5bpmk1rjwdfGoaKEqKGuwRiBX5DHkQkxE\n8Zayt2VOBcX7nzyRI05NuEIMrLX3rZ61CktN1aH8fF02He6aRaoE/Qm9L0tujM8V\nNi8WiLMDeR/Ifs3u4/HAv1E8v1byv0dCa7klR8J257McJ/ID4X4pzcxaXgE4ViOd\nGOHNu9ECgYEApq1zkZthEQymTUxs+lSFcubQpaXyf5ZC61cJewpWkqGDtSC+8DxE\nF/jydybWuoNHXymnvY6QywxuIooivbuib6AlgpEJeybmnWlDOZklFOD0abNZ+aNO\ndUk7XVGffCakXQ0jp1kmZA4lGsYK1h5dEU5DgXqu4UYJ88Vttax2W+Y=\n-----END RSA PRIVATE KEY-----\n", + "clientEmail": "gcpuser@project_id.iam.gserviceaccount.com", + "clientId": "client_id", + "authUri": "https://accounts.google.com/o/oauth2/auth", + "tokenUri": "https://oauth2.googleapis.com/token", + "authProviderX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs", + "clientX509CertUrl": "https://www.googleapis.com/oauth2/v1/certs", + } + }, + }, + }, + "sourceConfig": { + "config": { + "type": "DatabaseMetadata", + "schemaFilterPattern": {"includes": ["my_instance"]}, + "tableFilterPattern": {"includes": ["random_table"]}, + } + }, + }, + "sink": {"type": "metadata-rest", "config": {}}, + "workflowConfig": { + "openMetadataServerConfig": { + "hostPort": "http://localhost:8585/api", + "authProvider": "openmetadata", + "securityConfig": { + "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" + }, + } + }, +} + +MOCK_DATABASE_SERVICE = DatabaseService( + id="85811038-099a-11ed-861d-0242ac120002", + name="local_bigtable", + connection=DatabaseConnection(), + serviceType=DatabaseServiceType.Glue, +) + +MOCK_DATABASE = Database( + id="2aaa012e-099a-11ed-861d-0242ac120002", + name="my-gcp-project", + fullyQualifiedName="local_bigtable.my-gcp-project", + displayName="my-gcp-project", + description="", + service=EntityReference( + id="85811038-099a-11ed-861d-0242ac120002", + type="databaseService", + ), +) + +MOCK_DATABASE_SCHEMA = DatabaseSchema( + id="2aaa012e-099a-11ed-861d-0242ac120056", + name="my_instance", + fullyQualifiedName="local_bigtable.my-gcp-project.my_instance", + displayName="default", + description="", + database=EntityReference( + id="2aaa012e-099a-11ed-861d-0242ac120002", + type="database", + ), + service=EntityReference( + id="85811038-099a-11ed-861d-0242ac120002", + type="databaseService", + ), +) + + +MOCK_CREATE_TABLE = CreateTableRequest( + name="random_table", + tableType=TableType.Regular, + columns=[ + Column( + name="row_key", + displayName="row_key", + dataType=DataType.BYTES, + dataTypeDisplay=DataType.BYTES.value, + ), + Column( + name="cf1.col1", + displayName="cf1.col1", + dataType=DataType.BYTES, + dataTypeDisplay=DataType.BYTES.value, + ), + Column( + name="cf2.col2", + displayName="cf2.col2", + dataType=DataType.BYTES, + dataTypeDisplay=DataType.BYTES.value, + ), + ], + tableConstraints=[ + TableConstraint(constraintType=ConstraintType.PRIMARY_KEY, columns=["row_key"]) + ], + databaseSchema="local_bigtable.my-gcp-project.my_instance", + sourceUrl=SourceUrl( + __root__="https://console.cloud.google.com/bigtable/instances/my_instance/tables/random_table/overview?project=my-gcp-project" + ), +) + + +EXPECTED_DATABASE_NAMES = ["my-gcp-project"] + +EXPECTED_DATABASE_SCHEMA_NAMES = [ + "my_instance", +] + +MOCK_DATABASE_SCHEMA_NAMES = [ + "my_instance", + "random1_schema", +] + +EXPECTED_TABLE_NAMES = [ + ("random_table", TableType.Regular), +] + + +def custom_column_compare(self, other): + return ( + self.name == other.name + and self.description == other.description + and self.children == other.children + ) + + +@pytest.fixture +def mock_bigtable_row(): + mock = Mock() + cell = Mock() + cell.value = b"cell_value" + cell.timestamp = 1234567890 + mock.cells = {"cf1": {b"col1": [cell]}, "cf2": {b"col2": [cell]}} + mock.row_key = b"row_key" + yield mock + + +@pytest.fixture +def mock_bigtable_table(mock_bigtable_row): + mock = Mock() + mock.table_id = "random_table" + mock.list_column_families.return_value = {"cf1": None, "cf2": None} + mock.read_rows.return_value = [mock_bigtable_row] + yield mock + + +@pytest.fixture +def mock_bigtable_instance(mock_bigtable_table): + mock = Mock() + mock.instance_id = "my_instance" + mock.project_id = "my-gcp-project" + mock.list_tables.return_value = [mock_bigtable_table] + yield mock + + +@pytest.fixture +def mock_google_cloud_client(mock_bigtable_instance): + with patch("google.cloud.bigtable.Client") as mock_client: + mock_client.list_instances.return_value = [[], []] + mock_client().list_instances.return_value = [[mock_bigtable_instance], []] + yield mock_client + + +@pytest.fixture +def mock_test_connection(): + with patch.object(BigtableSource, "test_connection") as mock_test_connection: + mock_test_connection.return_value = True + yield mock_test_connection + + +class BigTableUnitTest(TestCase): + @pytest.fixture(autouse=True) + def setup( + self, + monkeypatch, + mock_google_cloud_client, + mock_test_connection, + mock_bigtable_instance, + mock_bigtable_table, + ): + self.config = OpenMetadataWorkflowConfig.parse_obj(mock_bigtable_config) + self.bigtable_source = BigtableSource.create( + mock_bigtable_config["source"], + OpenMetadata(self.config.workflowConfig.openMetadataServerConfig), + ) + self.bigtable_source.context.__dict__[ + "database_service" + ] = MOCK_DATABASE_SERVICE.name.__root__ + self.bigtable_source.context.__dict__["database"] = MOCK_DATABASE.name.__root__ + self.bigtable_source.context.__dict__[ + "database_schema" + ] = MOCK_DATABASE_SCHEMA.name.__root__ + self.bigtable_source.instances = { + "my-gcp-project": { + mock_bigtable_instance.instance_id: mock_bigtable_instance + } + } + self.bigtable_source.tables = { + "my-gcp-project": { + mock_bigtable_instance.instance_id: { + mock_bigtable_table.table_id: mock_bigtable_table + } + } + } + + def test_database_names(self): + assert ( + list(self.bigtable_source.get_database_names()) == EXPECTED_DATABASE_NAMES + ) + + def test_database_schema_names(self): + assert ( + list(self.bigtable_source.get_database_schema_names()) + == EXPECTED_DATABASE_SCHEMA_NAMES + ) + + def test_table_names(self): + assert ( + list(self.bigtable_source.get_tables_name_and_type()) + == EXPECTED_TABLE_NAMES + ) + + def test_yield_tables(self): + Column.__eq__ = custom_column_compare + result = next(self.bigtable_source.yield_table(EXPECTED_TABLE_NAMES[0])) + assert result.left is None + assert result.right.name.__root__ == "random_table" + assert result.right == MOCK_CREATE_TABLE diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/BigTableConnectionClassConverter.java b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/BigTableConnectionClassConverter.java new file mode 100644 index 000000000000..b26383645d1b --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/BigTableConnectionClassConverter.java @@ -0,0 +1,37 @@ +/* + * Copyright 2024 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.openmetadata.service.secrets.converter; + +import java.util.List; +import org.openmetadata.schema.security.credentials.GCPCredentials; +import org.openmetadata.schema.services.connections.database.BigTableConnection; +import org.openmetadata.service.util.JsonUtils; + +/** Converter class to get an `BigTableConnection` object. */ +public class BigTableConnectionClassConverter extends ClassConverter { + + public BigTableConnectionClassConverter() { + super(BigTableConnection.class); + } + + @Override + public Object convert(Object object) { + BigTableConnection connection = (BigTableConnection) JsonUtils.convertValue(object, this.clazz); + + tryToConvertOrFail(connection.getCredentials(), List.of(GCPCredentials.class)) + .ifPresent(obj -> connection.setCredentials((GCPCredentials) obj)); + + return connection; + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java index c428b4b5acad..0da02d5a894d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/secrets/converter/ClassConverterFactory.java @@ -24,12 +24,7 @@ import org.openmetadata.schema.services.connections.dashboard.LookerConnection; import org.openmetadata.schema.services.connections.dashboard.SupersetConnection; import org.openmetadata.schema.services.connections.dashboard.TableauConnection; -import org.openmetadata.schema.services.connections.database.BigQueryConnection; -import org.openmetadata.schema.services.connections.database.DatalakeConnection; -import org.openmetadata.schema.services.connections.database.IcebergConnection; -import org.openmetadata.schema.services.connections.database.MysqlConnection; -import org.openmetadata.schema.services.connections.database.PostgresConnection; -import org.openmetadata.schema.services.connections.database.TrinoConnection; +import org.openmetadata.schema.services.connections.database.*; import org.openmetadata.schema.services.connections.database.datalake.GCSConfig; import org.openmetadata.schema.services.connections.database.iceberg.IcebergFileSystem; import org.openmetadata.schema.services.connections.pipeline.AirflowConnection; @@ -49,6 +44,7 @@ private ClassConverterFactory() { Map.ofEntries( Map.entry(AirflowConnection.class, new AirflowConnectionClassConverter()), Map.entry(BigQueryConnection.class, new BigQueryConnectionClassConverter()), + Map.entry(BigTableConnection.class, new BigTableConnectionClassConverter()), Map.entry(DatalakeConnection.class, new DatalakeConnectionClassConverter()), Map.entry(MysqlConnection.class, new MysqlConnectionClassConverter()), Map.entry(TrinoConnection.class, new TrinoConnectionClassConverter()), diff --git a/openmetadata-service/src/main/resources/json/data/testConnections/database/bigtable.json b/openmetadata-service/src/main/resources/json/data/testConnections/database/bigtable.json new file mode 100644 index 000000000000..86ca0bb17ac2 --- /dev/null +++ b/openmetadata-service/src/main/resources/json/data/testConnections/database/bigtable.json @@ -0,0 +1,29 @@ +{ + "name": "BigTable", + "displayName": "BigTable Test Connection", + "description": "This Test Connection validates the access against the database and basic metadata extraction of schemas and tables.", + "steps": [ + { + "name": "GetInstances", + "description": "Validate that we can get the instances with the given credentials.", + "errorMessage": "Failed to get BigTable instances, please validate to the credentials of service account", + "shortCircuit": true, + "mandatory": true + }, + { + "name": "GetTables", + "description": "Validate that we can get tables with the given credentials.", + "errorMessage": "Failed to get BigTable tables, please validate to the credentials of service account", + "shortCircuit": true, + "mandatory": true + }, + { + "name": "ReadRows", + "description": "Validate that we can read rows with the given credentials.", + "errorMessage": "Failed to read rows from BigTable, please validate to the credentials of service account" + "shortCircuit": true, + "mandatory": true + } + ] + } + diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/bigTableConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/bigTableConnection.json new file mode 100644 index 000000000000..513c04232a47 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/bigTableConnection.json @@ -0,0 +1,51 @@ +{ + "$id": "https://open-metadata.org/schema/entity/services/connections/database/bigTableConnection.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "BigTableConnection", + "description": "Google BigTable Connection Config", + "type": "object", + "javaType": "org.openmetadata.schema.services.connections.database.BigTableConnection", + "definitions": { + "bigtableType": { + "description": "Service type.", + "type": "string", + "enum": [ + "BigTable" + ], + "default": "BigTable" + } + }, + "properties": { + "type": { + "title": "Service Type", + "description": "Service Type", + "$ref": "#/definitions/bigtableType", + "default": "BigTable" + }, + "credentials": { + "title": "GCP Credentials", + "description": "GCP Credentials", + "$ref": "../../../../security/credentials/gcpCredentials.json" + }, + "connectionOptions": { + "title": "Connection Options", + "$ref": "../connectionBasicType.json#/definitions/connectionOptions" + }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, + "supportsMetadataExtraction": { + "title": "Supports Metadata Extraction", + "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" + }, + "supportsDatabase": { + "title": "Supports Database", + "$ref": "../connectionBasicType.json#/definitions/supportsDatabase" + } + }, + "additionalProperties": false, + "required": [ + "credentials" + ] +} diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json index 9b87a31c033b..b03d412e02fb 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/databaseService.json @@ -16,6 +16,7 @@ "type": "string", "enum": [ "BigQuery", + "BigTable", "Mysql", "Redshift", "Snowflake", @@ -59,6 +60,9 @@ { "name": "BigQuery" }, + { + "name": "BigTable" + }, { "name": "Mysql" }, @@ -188,6 +192,9 @@ { "$ref": "./connections/database/bigQueryConnection.json" }, + { + "$ref": "./connections/database/bigTableConnection.json" + }, { "$ref": "./connections/database/athenaConnection.json" }, diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/BigTable.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/BigTable.md new file mode 100644 index 000000000000..3dd8b696f0ce --- /dev/null +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/BigTable.md @@ -0,0 +1,146 @@ +# BigTable + +In this section, we provide guides and references to use the BigTable connector. + +## Requirements + +We need to enable the BigTable API and use an account with a specific set of minimum permissions: + +### BigTable API Permissions + +Click on `Enable API`, which will enable the APs on the selected project: + +- [Cloud Bigtable Admin API ](https://console.cloud.google.com/apis/api/bigtableadmin.googleapis.com) +- [Cloud Bigtable API](https://console.cloud.google.com/apis/library/bigtable.googleapis.com) + +### GCP Permissions + +To execute the metadata extraction and Usage workflow successfully, the user or the service account should have enough permissions to fetch required data: + +- `bigtable.instances.get` +- `bigtable.instances.list` +- `bigtable.tables.get` +- `bigtable.tables.list` +- `bigtable.tables.readRows` + +You can visit [this](https://docs.open-metadata.org/connectors/database/bigtable/roles) documentation on how you can create a custom role in GCP and assign the above permissions to the role & service account! + +You can find further information on the BigTable connector in the [docs](https://docs.open-metadata.org/connectors/database/bigtable). + +## Connection Details + +$$section +### Scheme $(id="scheme") + +SQLAlchemy driver scheme options. +$$ + +$$section +### Host Port $(id="hostPort") + +BigQuery APIs URL. By default, the API URL is `bigquery.googleapis.com`. You can modify this if you have custom implementation of BigQuery. +$$ + +$$section +### GCP Credentials Configuration $(id="gcpConfig") + +You can authenticate with your BigQuery instance using either `GCP Credentials Path` where you can specify the file path of the service account key, or you can pass the values directly by choosing the `GCP Credentials Values` from the service account key file. + +You can check [this](https://cloud.google.com/iam/docs/keys-create-delete#iam-service-account-keys-create-console) documentation on how to create the service account keys and download it. + +If you want to use [ADC authentication](https://cloud.google.com/docs/authentication#adc) for BigQuery you can just leave the GCP credentials empty. + +$$ + +$$section +### Credentials Type $(id="type") + +Credentials Type is the type of the account, for a service account the value of this field is `service_account`. To fetch this key, look for the value associated with the `type` key in the service account key file. +$$ + +$$section +### Project ID $(id="projectId") + +A project ID is a unique string used to differentiate your project from all others in Google Cloud. To fetch this key, look for the value associated with the `project_id` key in the service account key file. +$$ + +$$section +### Private Key ID $(id="privateKeyId") + +This is a unique identifier for the private key associated with the service account. To fetch this key, look for the value associated with the `private_key_id` key in the service account file. +$$ + +$$section +### Private Key $(id="privateKey") + +This is the private key associated with the service account that is used to authenticate and authorize access to GCP. To fetch this key, look for the value associated with the `private_key` key in the service account file. + +Make sure you are passing the key in a correct format. If your private key looks like this: + +``` +-----BEGIN ENCRYPTED PRIVATE KEY----- +MII.. +MBQ... +CgU.. +8Lt.. +... +h+4= +-----END ENCRYPTED PRIVATE KEY----- +``` + +You will have to replace new lines with `\n` and the final private key that you need to pass should look like this: + +``` +-----BEGIN ENCRYPTED PRIVATE KEY-----\nMII..\nMBQ...\nCgU..\n8Lt..\n...\nh+4=\n-----END ENCRYPTED PRIVATE KEY-----\n +``` +$$ + +$$section +### Client Email $(id="clientEmail") + +This is the email address associated with the service account. To fetch this key, look for the value associated with the `client_email` key in the service account key file. +$$ + +$$section +### Client ID $(id="clientId") + +This is a unique identifier for the service account. To fetch this key, look for the value associated with the `client_id` key in the service account key file. +$$ + +$$section +### Auth URI $(id="authUri") + +This is the URI for the authorization server. To fetch this key, look for the value associated with the `auth_uri` key in the service account key file. +$$ + +$$section +### Token URI $(id="tokenUri") + +The Google Cloud Token URI is a specific endpoint used to obtain an OAuth 2.0 access token from the Google Cloud IAM service. This token allows you to authenticate and access various Google Cloud resources and APIs that require authorization. + +To fetch this key, look for the value associated with the `token_uri` key in the service account credentials file. +$$ + +$$section +### Auth Provider X509Cert URL $(id="authProviderX509CertUrl") + +This is the URL of the certificate that verifies the authenticity of the authorization server. To fetch this key, look for the value associated with the `auth_provider_x509_cert_url` key in the service account key file. +$$ + +$$section +### Client X509Cert URL $(id="clientX509CertUrl") + +This is the URL of the certificate that verifies the authenticity of the service account. To fetch this key, look for the value associated with the `client_x509_cert_url` key in the service account key file. +$$ + +$$section +### Connection Options $(id="connectionOptions") + +Additional connection options to build the URL that can be sent to service during the connection. +$$ + +$$section +### Connection Arguments $(id="connectionArguments") + +Additional connection arguments such as security or protocol configs that can be sent to service during connection. +$$ diff --git a/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-bigtable.png b/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-bigtable.png new file mode 100644 index 000000000000..19f9da21e847 Binary files /dev/null and b/openmetadata-ui/src/main/resources/ui/src/assets/img/service-icon-bigtable.png differ diff --git a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts index bd064f4b249f..08087a72676f 100644 --- a/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts +++ b/openmetadata-ui/src/main/resources/ui/src/constants/Services.constant.ts @@ -23,6 +23,7 @@ import amundsen from '../assets/img/service-icon-amundsen.png'; import athena from '../assets/img/service-icon-athena.png'; import atlas from '../assets/img/service-icon-atlas.svg'; import azuresql from '../assets/img/service-icon-azuresql.png'; +import bigtable from '../assets/img/service-icon-bigtable.png'; import clickhouse from '../assets/img/service-icon-clickhouse.png'; import couchbase from '../assets/img/service-icon-couchbase.svg'; import dagster from '../assets/img/service-icon-dagster.png'; @@ -122,6 +123,7 @@ export const SQLITE = sqlite; export const MSSQL = mssql; export const REDSHIFT = redshift; export const BIGQUERY = query; +export const BIGTABLE = bigtable; export const HIVE = hive; export const IMPALA = impala; export const POSTGRES = postgres; diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts b/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts index 31c016818187..bf348fcd5916 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/DatabaseServiceUtils.ts @@ -17,6 +17,7 @@ import { DatabaseServiceType } from '../generated/entity/services/databaseServic import athenaConnection from '../jsons/connectionSchemas/connections/database/athenaConnection.json'; import azureSQLConnection from '../jsons/connectionSchemas/connections/database/azureSQLConnection.json'; import bigQueryConnection from '../jsons/connectionSchemas/connections/database/bigQueryConnection.json'; +import bigTableConnection from '../jsons/connectionSchemas/connections/database/bigTableConnection.json'; import clickhouseConnection from '../jsons/connectionSchemas/connections/database/clickhouseConnection.json'; import couchbaseConnection from '../jsons/connectionSchemas/connections/database/couchbaseConnection.json'; import customDatabaseConnection from '../jsons/connectionSchemas/connections/database/customDatabaseConnection.json'; @@ -71,6 +72,11 @@ export const getDatabaseConfig = (type: DatabaseServiceType) => { break; } + case DatabaseServiceType.BigTable: { + schema = bigTableConnection; + + break; + } case DatabaseServiceType.Clickhouse: { schema = clickhouseConnection; diff --git a/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtilClassBase.ts b/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtilClassBase.ts index b6af88398ad3..5fee6fdf94ff 100644 --- a/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtilClassBase.ts +++ b/openmetadata-ui/src/main/resources/ui/src/utils/ServiceUtilClassBase.ts @@ -22,6 +22,7 @@ import { ATLAS, AZURESQL, BIGQUERY, + BIGTABLE, CLICKHOUSE, COMMON_UI_SCHEMA, COUCHBASE, @@ -160,6 +161,9 @@ class ServiceUtilClassBase { case DatabaseServiceType.BigQuery: return BIGQUERY; + case DatabaseServiceType.BigTable: + return BIGTABLE; + case DatabaseServiceType.Hive: return HIVE;