diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 62976213dab68..d91016fd0af0c 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -198,29 +198,29 @@ def get_long_description(): "console_scripts": ["datahub = datahub.entrypoints:main"], "datahub.ingestion.source.plugins": [ "file = datahub.ingestion.source.file:GenericFileSource", - "sqlalchemy = datahub.ingestion.source.sql_generic:SQLAlchemyGenericSource", - "athena = datahub.ingestion.source.athena:AthenaSource", - "bigquery = datahub.ingestion.source.bigquery:BigQuerySource", - "bigquery-usage = datahub.ingestion.source.bigquery_usage:BigQueryUsageSource", + "sqlalchemy = datahub.ingestion.source.sql.sql_generic:SQLAlchemyGenericSource", + "athena = datahub.ingestion.source.sql.athena:AthenaSource", + "bigquery = datahub.ingestion.source.sql.bigquery:BigQuerySource", + "bigquery-usage = datahub.ingestion.source.usage.bigquery_usage:BigQueryUsageSource", "dbt = datahub.ingestion.source.dbt:DBTSource", - "druid = datahub.ingestion.source.druid:DruidSource", + "druid = datahub.ingestion.source.sql.druid:DruidSource", "feast = datahub.ingestion.source.feast:FeastSource", - "glue = datahub.ingestion.source.glue:GlueSource", - "sagemaker = datahub.ingestion.source.sagemaker:SagemakerSource", - "hive = datahub.ingestion.source.hive:HiveSource", + "glue = datahub.ingestion.source.aws.glue:GlueSource", + "sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource", + "hive = datahub.ingestion.source.sql.hive:HiveSource", "kafka = datahub.ingestion.source.kafka:KafkaSource", "kafka-connect = datahub.ingestion.source.kafka_connect:KafkaConnectSource", "ldap = datahub.ingestion.source.ldap:LDAPSource", "looker = datahub.ingestion.source.looker:LookerDashboardSource", "lookml = datahub.ingestion.source.lookml:LookMLSource", "mongodb = datahub.ingestion.source.mongodb:MongoDBSource", - "mssql = datahub.ingestion.source.mssql:SQLServerSource", - "mysql = datahub.ingestion.source.mysql:MySQLSource", - "oracle = datahub.ingestion.source.oracle:OracleSource", - "postgres = datahub.ingestion.source.postgres:PostgresSource", - "redshift = datahub.ingestion.source.redshift:RedshiftSource", - "snowflake = datahub.ingestion.source.snowflake:SnowflakeSource", - "snowflake-usage = datahub.ingestion.source.snowflake_usage:SnowflakeUsageSource", + "mssql = datahub.ingestion.source.sql.mssql:SQLServerSource", + "mysql = datahub.ingestion.source.sql.mysql:MySQLSource", + "oracle = datahub.ingestion.source.sql.oracle:OracleSource", + "postgres = datahub.ingestion.source.sql.postgres:PostgresSource", + "redshift = datahub.ingestion.source.sql.redshift:RedshiftSource", + "snowflake = datahub.ingestion.source.sql.snowflake:SnowflakeSource", + "snowflake-usage = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource", "superset = datahub.ingestion.source.superset:SupersetSource", ], "datahub.ingestion.sink.plugins": [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/aws/__init__.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/__init__.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/__init__.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws_common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/aws_common.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py similarity index 99% rename from metadata-ingestion/src/datahub/ingestion/source/glue.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 6f614121c8783..0a73030d399fe 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -10,7 +10,7 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.aws_common import AwsSourceConfig, make_s3_urn +from datahub.ingestion.source.aws.aws_common import AwsSourceConfig, make_s3_urn from datahub.metadata.com.linkedin.pegasus2avro.common import Status from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py similarity index 88% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py index f8f060cbcf016..28b40d947b0a8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py @@ -4,20 +4,20 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import Source from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.sagemaker_processors.common import ( +from datahub.ingestion.source.aws.sagemaker_processors.common import ( SagemakerSourceConfig, SagemakerSourceReport, ) -from datahub.ingestion.source.sagemaker_processors.feature_groups import ( +from datahub.ingestion.source.aws.sagemaker_processors.feature_groups import ( FeatureGroupProcessor, ) -from datahub.ingestion.source.sagemaker_processors.jobs import ( +from datahub.ingestion.source.aws.sagemaker_processors.jobs import ( JobKey, JobProcessor, ModelJob, ) -from datahub.ingestion.source.sagemaker_processors.lineage import LineageProcessor -from datahub.ingestion.source.sagemaker_processors.models import ModelProcessor +from datahub.ingestion.source.aws.sagemaker_processors.lineage import LineageProcessor +from datahub.ingestion.source.aws.sagemaker_processors.models import ModelProcessor class SagemakerSource(Source): diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py similarity index 94% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/common.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py index 13a82033ea7b8..9272f3fd81907 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/common.py @@ -2,7 +2,7 @@ from typing import Dict, Optional, Union from datahub.ingestion.api.source import SourceReport -from datahub.ingestion.source.aws_common import AwsSourceConfig +from datahub.ingestion.source.aws.aws_common import AwsSourceConfig class SagemakerSourceConfig(AwsSourceConfig): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py similarity index 99% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/feature_groups.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index a23a1c7678596..ad2c5d92f32f9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -3,7 +3,9 @@ import datahub.emitter.mce_builder as builder from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.sagemaker_processors.common import SagemakerSourceReport +from datahub.ingestion.source.aws.sagemaker_processors.common import ( + SagemakerSourceReport, +) from datahub.metadata.com.linkedin.pegasus2avro.common import MLFeatureDataType from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( MLFeatureSnapshot, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/jobs.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py similarity index 99% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/jobs.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py index 440c712b94ad8..9e1667bfc3bee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/jobs.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/jobs.py @@ -16,8 +16,10 @@ from datahub.emitter import mce_builder from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.aws_common import make_s3_urn -from datahub.ingestion.source.sagemaker_processors.common import SagemakerSourceReport +from datahub.ingestion.source.aws.aws_common import make_s3_urn +from datahub.ingestion.source.aws.sagemaker_processors.common import ( + SagemakerSourceReport, +) from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.schema_classes import ( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py similarity index 98% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/lineage.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py index d9e4d178cd7ee..92f17e37b94bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py @@ -2,7 +2,9 @@ from dataclasses import dataclass, field from typing import Any, DefaultDict, Dict, List, Set -from datahub.ingestion.source.sagemaker_processors.common import SagemakerSourceReport +from datahub.ingestion.source.aws.sagemaker_processors.common import ( + SagemakerSourceReport, +) @dataclass diff --git a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py similarity index 98% rename from metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/models.py rename to metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index e72252b13e3e1..365d5f53c1744 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -5,13 +5,15 @@ import datahub.emitter.mce_builder as builder from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.sagemaker_processors.common import SagemakerSourceReport -from datahub.ingestion.source.sagemaker_processors.jobs import ( +from datahub.ingestion.source.aws.sagemaker_processors.common import ( + SagemakerSourceReport, +) +from datahub.ingestion.source.aws.sagemaker_processors.jobs import ( JobDirection, JobKey, ModelJob, ) -from datahub.ingestion.source.sagemaker_processors.lineage import LineageInfo +from datahub.ingestion.source.aws.sagemaker_processors.lineage import LineageInfo from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import ( MLModelDeploymentSnapshot, MLModelGroupSnapshot, diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt.py b/metadata-ingestion/src/datahub/ingestion/source/dbt.py index d641585c17dc0..3971bc0b9d63a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt.py @@ -11,7 +11,7 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit -from datahub.ingestion.source.dbt_types import ( +from datahub.ingestion.source.sql.sql_types import ( POSTGRES_TYPES_MAP, SNOWFLAKE_TYPES_MAP, resolve_postgres_modified_type, diff --git a/metadata-ingestion/src/datahub/ingestion/source/lookml.py b/metadata-ingestion/src/datahub/ingestion/source/lookml.py index f04d9828f2dfc..6ecc61a712783 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/lookml.py +++ b/metadata-ingestion/src/datahub/ingestion/source/lookml.py @@ -24,6 +24,11 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.sql.sql_types import ( + POSTGRES_TYPES_MAP, + SNOWFLAKE_TYPES_MAP, + resolve_postgres_modified_type, +) from datahub.metadata.com.linkedin.pegasus2avro.common import Status from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageTypeClass, @@ -420,6 +425,46 @@ def get_including_extends( return None +field_type_mapping = { + **POSTGRES_TYPES_MAP, + **SNOWFLAKE_TYPES_MAP, + "date": DateTypeClass, + "date_time": TimeTypeClass, + "date_millisecond": TimeTypeClass, + "date_minute": TimeTypeClass, + "date_raw": TimeTypeClass, + "date_week": TimeTypeClass, + "duration_day": TimeTypeClass, + "distance": NumberTypeClass, + "duration": NumberTypeClass, + "location": UnionTypeClass, + "number": NumberTypeClass, + "string": StringTypeClass, + "tier": EnumTypeClass, + "time": TimeTypeClass, + "unquoted": StringTypeClass, + "yesno": BooleanTypeClass, + "zipcode": EnumTypeClass, + "int": NumberTypeClass, + "average": NumberTypeClass, + "average_distinct": NumberTypeClass, + "count": NumberTypeClass, + "count_distinct": NumberTypeClass, + "list": ArrayTypeClass, + "max": NumberTypeClass, + "median": NumberTypeClass, + "median_distinct": NumberTypeClass, + "min": NumberTypeClass, + "percent_of_previous": NumberTypeClass, + "percent_of_total": NumberTypeClass, + "percentile": NumberTypeClass, + "percentile_distinct": NumberTypeClass, + "running_total": NumberTypeClass, + "sum": NumberTypeClass, + "sum_distinct": NumberTypeClass, +} + + class LookMLSource(Source): source_config: LookMLSourceConfig reporter: LookMLSourceReport @@ -485,46 +530,22 @@ def _get_upstream_lineage(self, looker_view: LookerView) -> UpstreamLineage: return upstream_lineage def _get_field_type(self, native_type: str) -> SchemaFieldDataType: - field_type_mapping = { - "date": DateTypeClass, - "date_time": TimeTypeClass, - "distance": NumberTypeClass, - "duration": NumberTypeClass, - "location": UnionTypeClass, - "number": NumberTypeClass, - "string": StringTypeClass, - "tier": EnumTypeClass, - "time": TimeTypeClass, - "unquoted": StringTypeClass, - "yesno": BooleanTypeClass, - "zipcode": EnumTypeClass, - "int": NumberTypeClass, - "average": NumberTypeClass, - "average_distinct": NumberTypeClass, - "count": NumberTypeClass, - "count_distinct": NumberTypeClass, - "list": ArrayTypeClass, - "max": NumberTypeClass, - "median": NumberTypeClass, - "median_distinct": NumberTypeClass, - "min": NumberTypeClass, - "percent_of_previous": NumberTypeClass, - "percent_of_total": NumberTypeClass, - "percentile": NumberTypeClass, - "percentile_distinct": NumberTypeClass, - "running_total": NumberTypeClass, - "sum": NumberTypeClass, - "sum_distinct": NumberTypeClass, - } - - if native_type in field_type_mapping: - type_class = field_type_mapping[native_type] - else: + + type_class = field_type_mapping.get(native_type) + + if type_class is None: + + # attempt Postgres modified type + type_class = resolve_postgres_modified_type(native_type) + + # if still not found, report a warning + if type_class is None: self.reporter.report_warning( native_type, f"The type '{native_type}' is not recognized for field type, setting as NullTypeClass.", ) type_class = NullTypeClass + data_type = SchemaFieldDataType(type=type_class()) return data_type diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/sql/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/source/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/athena.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/athena.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/bigquery.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/bigquery.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/druid.py b/metadata-ingestion/src/datahub/ingestion/source/sql/druid.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/druid.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/druid.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py similarity index 96% rename from metadata-ingestion/src/datahub/ingestion/source/hive.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index e01d401004cd2..33256ab6d46b8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -2,7 +2,7 @@ from pyhive import hive # noqa: F401 from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveTimestamp -from datahub.ingestion.source.sql_common import ( +from datahub.ingestion.source.sql.sql_common import ( BasicSQLAlchemyConfig, SQLAlchemySource, register_custom_type, diff --git a/metadata-ingestion/src/datahub/ingestion/source/mssql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/mssql.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/mssql.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/mysql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/mysql.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/oracle.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/postgres.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py similarity index 95% rename from metadata-ingestion/src/datahub/ingestion/source/redshift.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py index ae9fe7cd62617..4999c6e2a4701 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/redshift.py @@ -7,8 +7,8 @@ from sqlalchemy_redshift.dialect import RedshiftDialect, RelationKey from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.postgres import PostgresConfig -from datahub.ingestion.source.sql_common import SQLAlchemySource +from datahub.ingestion.source.sql.postgres import PostgresConfig +from datahub.ingestion.source.sql.sql_common import SQLAlchemySource # TRICKY: it's necessary to import the Postgres source because # that module has some side effects that we care about here. diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake.py b/metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/snowflake.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/snowflake.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/sql_common.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_generic.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/sql_generic.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/sql_generic.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/dbt_types.py rename to metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/usage/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py similarity index 99% rename from metadata-ingestion/src/datahub/ingestion/source/bigquery_usage.py rename to metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py index 5f7d705d31ccb..076755879f0bf 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/bigquery_usage.py @@ -15,7 +15,7 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import UsageStatsWorkUnit -from datahub.ingestion.source.usage_common import ( +from datahub.ingestion.source.usage.usage_common import ( BaseUsageConfig, GenericAggregatedDataset, get_time_bucket, diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py similarity index 98% rename from metadata-ingestion/src/datahub/ingestion/source/snowflake_usage.py rename to metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py index 629d38a8c729f..0326cf527efe9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/snowflake_usage.py @@ -13,8 +13,8 @@ import datahub.emitter.mce_builder as builder from datahub.ingestion.api.source import Source, SourceReport from datahub.ingestion.api.workunit import UsageStatsWorkUnit -from datahub.ingestion.source.snowflake import BaseSnowflakeConfig -from datahub.ingestion.source.usage_common import ( +from datahub.ingestion.source.sql.snowflake import BaseSnowflakeConfig +from datahub.ingestion.source.usage.usage_common import ( BaseUsageConfig, GenericAggregatedDataset, get_time_bucket, diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage_common.py b/metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py similarity index 100% rename from metadata-ingestion/src/datahub/ingestion/source/usage_common.py rename to metadata-ingestion/src/datahub/ingestion/source/usage/usage_common.py diff --git a/metadata-ingestion/tests/integration/bigquery-usage/test_bigquery_usage.py b/metadata-ingestion/tests/integration/bigquery-usage/test_bigquery_usage.py index 2fb10c598f30e..a7b5d273d6a3d 100644 --- a/metadata-ingestion/tests/integration/bigquery-usage/test_bigquery_usage.py +++ b/metadata-ingestion/tests/integration/bigquery-usage/test_bigquery_usage.py @@ -6,7 +6,7 @@ from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.run.pipeline import Pipeline -from datahub.ingestion.source.bigquery_usage import ( +from datahub.ingestion.source.usage.bigquery_usage import ( BigQueryUsageConfig, BigQueryUsageSource, ) @@ -54,7 +54,7 @@ def test_bq_usage_source(pytestconfig, tmp_path): logs.write(log_entries) with unittest.mock.patch( - "datahub.ingestion.source.bigquery_usage.GCPLoggingClient", autospec=True + "datahub.ingestion.source.usage.bigquery_usage.GCPLoggingClient", autospec=True ) as MockClient: # Add mock BigQuery API responses. with bigquery_reference_logs_path.open() as logs: diff --git a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py index 2cad33e033683..fe1518c674030 100644 --- a/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py +++ b/metadata-ingestion/tests/unit/sagemaker/test_sagemaker_source.py @@ -4,8 +4,11 @@ from freezegun import freeze_time from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.sagemaker import SagemakerSource, SagemakerSourceConfig -from datahub.ingestion.source.sagemaker_processors.jobs import SAGEMAKER_JOB_TYPES +from datahub.ingestion.source.aws.sagemaker import ( + SagemakerSource, + SagemakerSourceConfig, +) +from datahub.ingestion.source.aws.sagemaker_processors.jobs import SAGEMAKER_JOB_TYPES from tests.test_helpers import mce_helpers from tests.unit.test_sagemaker_source_stubs import ( describe_endpoint_response_1, diff --git a/metadata-ingestion/tests/unit/test_druid_source.py b/metadata-ingestion/tests/unit/test_druid_source.py index 60afacf08fb43..d726ac501ecab 100644 --- a/metadata-ingestion/tests/unit/test_druid_source.py +++ b/metadata-ingestion/tests/unit/test_druid_source.py @@ -3,7 +3,7 @@ @pytest.mark.integration def test_druid_uri(): - from datahub.ingestion.source.druid import DruidConfig + from datahub.ingestion.source.sql.druid import DruidConfig config = DruidConfig.parse_obj({"host_port": "localhost:8082"}) diff --git a/metadata-ingestion/tests/unit/test_glue_source.py b/metadata-ingestion/tests/unit/test_glue_source.py index 77f71c6647c5a..6bed9f953ec64 100644 --- a/metadata-ingestion/tests/unit/test_glue_source.py +++ b/metadata-ingestion/tests/unit/test_glue_source.py @@ -4,7 +4,11 @@ from freezegun import freeze_time from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.glue import GlueSource, GlueSourceConfig, get_column_type +from datahub.ingestion.source.aws.glue import ( + GlueSource, + GlueSourceConfig, + get_column_type, +) from datahub.metadata.com.linkedin.pegasus2avro.schema import ( ArrayTypeClass, MapTypeClass, diff --git a/metadata-ingestion/tests/unit/test_hive_source.py b/metadata-ingestion/tests/unit/test_hive_source.py index 3888f3fa559c4..365b39c8cec83 100644 --- a/metadata-ingestion/tests/unit/test_hive_source.py +++ b/metadata-ingestion/tests/unit/test_hive_source.py @@ -3,7 +3,7 @@ @pytest.mark.integration def test_hive_configuration_get_identifier_with_database(): - from datahub.ingestion.source.hive import HiveConfig + from datahub.ingestion.source.sql.hive import HiveConfig test_db_name = "test_database" test_schema_name = "test_schema" diff --git a/metadata-ingestion/tests/unit/test_oracle_source.py b/metadata-ingestion/tests/unit/test_oracle_source.py index a4b8a798da778..18d4e85296e36 100644 --- a/metadata-ingestion/tests/unit/test_oracle_source.py +++ b/metadata-ingestion/tests/unit/test_oracle_source.py @@ -3,7 +3,7 @@ import pytest from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.source.oracle import OracleConfig, OracleSource +from datahub.ingestion.source.sql.oracle import OracleConfig, OracleSource def test_oracle_config(): @@ -34,7 +34,7 @@ def test_oracle_config(): ) with unittest.mock.patch( - "datahub.ingestion.source.sql_common.SQLAlchemySource.get_workunits" + "datahub.ingestion.source.sql.sql_common.SQLAlchemySource.get_workunits" ): OracleSource.create( { diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py index a146d13a9a2c4..97b33151f0928 100644 --- a/metadata-ingestion/tests/unit/test_postgres_source.py +++ b/metadata-ingestion/tests/unit/test_postgres_source.py @@ -1,4 +1,4 @@ -from datahub.ingestion.source.postgres import PostgresConfig +from datahub.ingestion.source.sql.postgres import PostgresConfig def _base_config(): diff --git a/metadata-ingestion/tests/unit/test_snowflake_source.py b/metadata-ingestion/tests/unit/test_snowflake_source.py index 3eea25f779a17..d0cebbec87449 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_source.py +++ b/metadata-ingestion/tests/unit/test_snowflake_source.py @@ -3,7 +3,7 @@ @pytest.mark.integration def test_snowflake_uri(): - from datahub.ingestion.source.snowflake import SnowflakeConfig + from datahub.ingestion.source.sql.snowflake import SnowflakeConfig config = SnowflakeConfig.parse_obj( {