-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/main' into issue_15742
- Loading branch information
Showing
4,358 changed files
with
73,872 additions
and
63,417 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
source: | ||
type: cassandra | ||
serviceName: local_cassandra | ||
serviceConnection: | ||
config: | ||
type: Cassandra | ||
databaseName: custom_database_name | ||
username: cassandra | ||
authType: | ||
password: cassandra | ||
# cloudConfig: | ||
# secureConnectBundle: <SCB File Path> | ||
# token: <Token String> | ||
# requestTimeout: <Timeout in seconds> | ||
# connectTimeout: <Timeout in seconds> | ||
hostPort: localhost:9042 | ||
sourceConfig: | ||
config: | ||
type: DatabaseMetadata | ||
includeTables: true | ||
sink: | ||
type: metadata-rest | ||
config: {} | ||
workflowConfig: | ||
# loggerLevel: INFO # DEBUG, INFO, WARN or ERROR | ||
openMetadataServerConfig: | ||
hostPort: http://localhost:8585/api | ||
authProvider: openmetadata | ||
securityConfig: | ||
jwtToken: "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
130 changes: 130 additions & 0 deletions
130
ingestion/src/metadata/ingestion/source/database/cassandra/connection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# Copyright 2021 Collate | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
""" | ||
Source connection handler | ||
""" | ||
from functools import partial | ||
from typing import Optional | ||
|
||
from cassandra.auth import PlainTextAuthProvider | ||
from cassandra.cluster import ( | ||
EXEC_PROFILE_DEFAULT, | ||
Cluster, | ||
ExecutionProfile, | ||
ProtocolVersion, | ||
) | ||
from cassandra.cluster import Session as CassandraSession | ||
from pydantic import BaseModel | ||
|
||
from metadata.generated.schema.entity.automations.workflow import ( | ||
Workflow as AutomationWorkflow, | ||
) | ||
from metadata.generated.schema.entity.services.connections.database.cassandraConnection import ( | ||
CassandraConnection, | ||
) | ||
from metadata.generated.schema.entity.services.connections.testConnectionResult import ( | ||
TestConnectionResult, | ||
) | ||
from metadata.ingestion.connections.test_connections import test_connection_steps | ||
from metadata.ingestion.ometa.ometa_api import OpenMetadata | ||
from metadata.ingestion.source.database.cassandra.queries import ( | ||
CASSANDRA_GET_KEYSPACE_MATERIALIZED_VIEWS, | ||
CASSANDRA_GET_KEYSPACE_TABLES, | ||
CASSANDRA_GET_KEYSPACES, | ||
CASSANDRA_GET_RELEASE_VERSION, | ||
) | ||
from metadata.utils.constants import THREE_MIN | ||
|
||
|
||
def get_connection(connection: CassandraConnection): | ||
""" | ||
Create connection | ||
""" | ||
|
||
cluster_config = {} | ||
if hasattr(connection.authType, "cloudConfig"): | ||
cloud_config = connection.authType.cloudConfig | ||
cluster_cloud_config = { | ||
"connect_timeout": cloud_config.connectTimeout, | ||
"use_default_tempdir": True, | ||
"secure_connect_bundle": cloud_config.secureConnectBundle, | ||
} | ||
profile = ExecutionProfile(request_timeout=cloud_config.requestTimeout) | ||
auth_provider = PlainTextAuthProvider("token", cloud_config.token) | ||
cluster_config.update( | ||
{ | ||
"cloud": cluster_cloud_config, | ||
"auth_provider": auth_provider, | ||
"execution_profiles": {EXEC_PROFILE_DEFAULT: profile}, | ||
"protocol_version": ProtocolVersion.V4, | ||
} | ||
) | ||
else: | ||
host, port = connection.hostPort.split(":") | ||
cluster_config.update({"contact_points": [host], "port": port}) | ||
if connection.username and getattr(connection.authType, "password", None): | ||
cluster_config["auth_provider"] = PlainTextAuthProvider( | ||
username=connection.username, | ||
password=connection.authType.password.get_secret_value(), | ||
) | ||
|
||
cluster = Cluster(**cluster_config) | ||
session = cluster.connect() | ||
|
||
return session | ||
|
||
|
||
def test_connection( | ||
metadata: OpenMetadata, | ||
session: CassandraSession, | ||
service_connection: CassandraConnection, | ||
automation_workflow: Optional[AutomationWorkflow] = None, | ||
timeout_seconds: Optional[int] = THREE_MIN, | ||
) -> TestConnectionResult: | ||
""" | ||
Test connection. This can be executed either as part | ||
of a metadata workflow or during an Automation Workflow | ||
""" | ||
|
||
class SchemaHolder(BaseModel): | ||
schema: Optional[str] = None | ||
|
||
holder = SchemaHolder() | ||
|
||
def test_get_release_version(session: CassandraConnection): | ||
session.execute(CASSANDRA_GET_RELEASE_VERSION) | ||
|
||
def test_get_schemas(session: CassandraSession, holder_: SchemaHolder): | ||
for keyspace in session.execute(CASSANDRA_GET_KEYSPACES): | ||
holder_.schema = keyspace.keyspace_name | ||
break | ||
|
||
def test_get_tables(session: CassandraSession, holder_: SchemaHolder): | ||
session.execute(CASSANDRA_GET_KEYSPACE_TABLES, [holder_.schema]) | ||
|
||
def test_get_views(session: CassandraSession, holder_: SchemaHolder): | ||
session.execute(CASSANDRA_GET_KEYSPACE_MATERIALIZED_VIEWS, [holder_.schema]) | ||
|
||
test_fn = { | ||
"CheckAccess": partial(test_get_release_version, session), | ||
"GetSchemas": partial(test_get_schemas, session, holder), | ||
"GetTables": partial(test_get_tables, session, holder), | ||
"GetViews": partial(test_get_views, session, holder), | ||
} | ||
|
||
return test_connection_steps( | ||
metadata=metadata, | ||
test_fn=test_fn, | ||
service_type=service_connection.type.value, | ||
automation_workflow=automation_workflow, | ||
timeout_seconds=timeout_seconds, | ||
) |
101 changes: 101 additions & 0 deletions
101
ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
# Copyright 2021 Collate | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
Iceberg source helpers. | ||
""" | ||
from __future__ import annotations | ||
|
||
from metadata.generated.schema.entity.data.table import Column, DataType | ||
|
||
|
||
class CassandraColumnParser: | ||
""" | ||
Responsible for containing the logic to parse a column from Cassandra to OpenMetadata | ||
""" | ||
|
||
datatype_mapping = { | ||
"ascii": DataType.STRING, | ||
"bigint": DataType.BIGINT, | ||
"blob": DataType.BLOB, | ||
"boolean": DataType.BOOLEAN, | ||
"date": DataType.DATE, | ||
"decimal": DataType.DECIMAL, | ||
"double": DataType.DOUBLE, | ||
"duration": DataType.INTERVAL, | ||
"float": DataType.FLOAT, | ||
"uuid": DataType.UUID, | ||
"inet": DataType.INET, | ||
"int": DataType.INT, | ||
"list": DataType.ARRAY, | ||
"map": DataType.MAP, | ||
"set": DataType.SET, | ||
"smallint": DataType.SMALLINT, | ||
"text": DataType.TEXT, | ||
"time": DataType.TIME, | ||
"timestamp": DataType.TIMESTAMP, | ||
"timeuuid": DataType.UUID, | ||
"tinyint": DataType.TINYINT, | ||
"tuple": DataType.TUPLE, | ||
"varint": DataType.STRING, | ||
"struct": DataType.STRUCT, | ||
} | ||
|
||
@classmethod | ||
def parse(cls, field) -> Column: | ||
""" | ||
Parses a Cassandra table column into an OpenMetadata column. | ||
""" | ||
|
||
data_type = None | ||
array_data_type = None | ||
raw_data_type = "" | ||
for letter in field.type: | ||
if letter == "<": | ||
if raw_data_type in ("", "frozen"): | ||
raw_data_type = "" | ||
continue | ||
|
||
if not data_type: | ||
data_type = cls.datatype_mapping.get( | ||
raw_data_type.lower(), DataType.UNKNOWN | ||
) | ||
elif not array_data_type: | ||
array_data_type = cls.datatype_mapping.get( | ||
raw_data_type.lower(), DataType.UNKNOWN | ||
) | ||
raw_data_type = "" | ||
if data_type != DataType.ARRAY: | ||
break | ||
|
||
elif letter != ">": | ||
raw_data_type += letter | ||
|
||
elif letter == ">": | ||
if not array_data_type and data_type: | ||
array_data_type = cls.datatype_mapping.get( | ||
raw_data_type.lower(), DataType.UNKNOWN | ||
) | ||
break | ||
else: | ||
if not data_type: | ||
data_type = cls.datatype_mapping.get( | ||
field.type.lower(), DataType.UNKNOWN | ||
) | ||
|
||
column_def = { | ||
"name": field.column_name, | ||
"dataTypeDisplay": field.type, | ||
"dataType": data_type, | ||
} | ||
if array_data_type: | ||
column_def["arrayDataType"] = array_data_type | ||
|
||
return Column(**column_def) |
Oops, something went wrong.