diff --git a/ingestion/setup.py b/ingestion/setup.py index e0d99d733612..c79f4b76380d 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -370,7 +370,7 @@ VERSIONS["grpc-tools"], VERSIONS["neo4j"], "testcontainers==3.7.1;python_version<'3.9'", - "testcontainers==4.8.0;python_version>='3.9'", + "testcontainers==4.8.1;python_version>='3.9'", "minio==7.2.5", *plugins["mlflow"], *plugins["datalake-s3"], diff --git a/ingestion/tests/integration/sql_server/conftest.py b/ingestion/tests/integration/sql_server/conftest.py index 8ff1c63c3ee9..6aac7de8e753 100644 --- a/ingestion/tests/integration/sql_server/conftest.py +++ b/ingestion/tests/integration/sql_server/conftest.py @@ -1,10 +1,8 @@ import os import shutil +import tempfile import pytest -from sqlalchemy import create_engine, text -from testcontainers.mssql import SqlServerContainer - from _openmetadata_testutils.helpers.docker import copy_dir_to_container, try_bind from metadata.generated.schema.api.services.createDatabaseService import ( CreateDatabaseServiceRequest, @@ -18,28 +16,61 @@ DatabaseService, DatabaseServiceType, ) +from sqlalchemy import create_engine, text +from testcontainers.mssql import SqlServerContainer from ..conftest import ingestion_config as base_ingestion_config @pytest.fixture(scope="session") -def mssql_container(tmp_path_factory): +def db_name(): + return "AdventureWorksLT2022" + + +class CustomSqlServerConainer(SqlServerContainer): + def start(self) -> "DbContainer": + dockerfile = f""" + FROM {self.image} + USER root + RUN mkdir -p /data + RUN chown mssql /data + USER mssql + """ + temp_dir = os.path.join(tempfile.gettempdir(), "mssql") + os.makedirs(temp_dir, exist_ok=True) + temp_dockerfile_path = os.path.join(temp_dir, "Dockerfile") + with open(temp_dockerfile_path, "w") as temp_dockerfile: + temp_dockerfile.write(dockerfile) + self.get_docker_client().build(temp_dir, tag=self.image) + return super().start() + + def _configure(self) -> None: + super()._configure() + self.with_env("SQL_SA_PASSWORD", self.password) + + +@pytest.fixture(scope="session") +def mssql_container(tmp_path_factory, db_name): container = SqlServerContainer( - "mcr.microsoft.com/mssql/server:2022-latest", dbname="AdventureWorks" + "mcr.microsoft.com/mssql/server:2022-latest", dbname="master" ) data_dir = tmp_path_factory.mktemp("data") shutil.copy( - os.path.join(os.path.dirname(__file__), "data", "AdventureWorks2017.bak"), + os.path.join(os.path.dirname(__file__), "data", f"{db_name}.bak"), str(data_dir), ) with open(data_dir / "install.sql", "w") as f: f.write( - """ + f""" USE [master] -RESTORE DATABASE [AdventureWorks] - FROM DISK = '/data/AdventureWorks2017.bak' - WITH MOVE 'AdventureWorks2017' TO '/var/opt/mssql/data/AdventureWorks.mdf', - MOVE 'AdventureWorks2017_log' TO '/var/opt/mssql/data/AdventureWorks_log.ldf' +RESTORE FILELISTONLY + FROM DISK = '/data/{db_name}.bak'; +GO + +RESTORE DATABASE [{db_name}] + FROM DISK = '/data/{db_name}.bak' + WITH MOVE '{db_name}_Data' TO '/var/opt/mssql/data/{db_name}.mdf', + MOVE '{db_name}_Log' TO '/var/opt/mssql/data/{db_name}.ldf'; GO """ ) @@ -49,17 +80,22 @@ def mssql_container(tmp_path_factory): copy_dir_to_container(str(data_dir), docker_container, "/data") res = docker_container.exec_run( [ - "/opt/mssql-tools/bin/sqlcmd", - "-S", - "localhost", - "-U", - container.username, - "-P", - container.password, - "-d", - "master", - "-i", - "/data/install.sql", + "bash", + "-c", + " ".join( + [ + "/opt/mssql-tools*/bin/sqlcmd", + "-U", + container.username, + "-P", + f"'{container.password}'", + "-d", + "master", + "-i", + "/data/install.sql", + "-C", + ] + ), ] ) if res[0] != 0: @@ -72,7 +108,7 @@ def mssql_container(tmp_path_factory): transaciton = conn.begin() conn.execute( text( - "SELECT * INTO AdventureWorks.HumanResources.DepartmenCopy FROM AdventureWorks.HumanResources.Department;" + f"SELECT * INTO {db_name}.SalesLT.CustomerCopy FROM {db_name}.SalesLT.Customer;" ) ) transaciton.commit() @@ -91,7 +127,7 @@ def scheme(request): @pytest.fixture(scope="module") -def create_service_request(mssql_container, scheme, tmp_path_factory): +def create_service_request(mssql_container, scheme, tmp_path_factory, db_name): return CreateDatabaseServiceRequest( name="docker_test_" + tmp_path_factory.mktemp("mssql").name + "_" + scheme.name, serviceType=DatabaseServiceType.Mssql, @@ -101,7 +137,7 @@ def create_service_request(mssql_container, scheme, tmp_path_factory): password=mssql_container.password, hostPort="localhost:" + mssql_container.get_exposed_port(mssql_container.port), - database="AdventureWorks", + database=db_name, scheme=scheme, ingestAllDatabases=True, connectionOptions={ @@ -115,12 +151,17 @@ def create_service_request(mssql_container, scheme, tmp_path_factory): @pytest.fixture(scope="module") def ingestion_config( - db_service, tmp_path_factory, workflow_config, sink_config, base_ingestion_config + db_service, + tmp_path_factory, + workflow_config, + sink_config, + base_ingestion_config, + db_name, ): base_ingestion_config["source"]["sourceConfig"]["config"][ "databaseFilterPattern" ] = { - "includes": ["TestDB", "AdventureWorks"], + "includes": ["TestDB", db_name], } return base_ingestion_config diff --git a/ingestion/tests/integration/sql_server/data/AdventureWorks2017.bak b/ingestion/tests/integration/sql_server/data/AdventureWorks2017.bak deleted file mode 100644 index be86e82f5443..000000000000 Binary files a/ingestion/tests/integration/sql_server/data/AdventureWorks2017.bak and /dev/null differ diff --git a/ingestion/tests/integration/sql_server/data/AdventureWorksLT2022.bak b/ingestion/tests/integration/sql_server/data/AdventureWorksLT2022.bak new file mode 100644 index 000000000000..6ef2ef01f9a0 Binary files /dev/null and b/ingestion/tests/integration/sql_server/data/AdventureWorksLT2022.bak differ diff --git a/ingestion/tests/integration/sql_server/test_lineage.py b/ingestion/tests/integration/sql_server/test_lineage.py index d54e23920aac..f0309bcb427a 100644 --- a/ingestion/tests/integration/sql_server/test_lineage.py +++ b/ingestion/tests/integration/sql_server/test_lineage.py @@ -27,7 +27,7 @@ def language_config(mssql_container, request): @pytest.fixture() -def lineage_config(language_config, db_service, workflow_config, sink_config): +def lineage_config(language_config, db_service, workflow_config, sink_config, db_name): return { "source": { "type": "mssql-lineage", @@ -35,7 +35,7 @@ def lineage_config(language_config, db_service, workflow_config, sink_config): "sourceConfig": { "config": { "type": "DatabaseLineage", - "databaseFilterPattern": {"includes": ["TestDB", "AdventureWorks"]}, + "databaseFilterPattern": {"includes": ["TestDB", db_name]}, }, }, }, @@ -52,13 +52,13 @@ def test_lineage( lineage_config, db_service, metadata, -): +db_name): search_cache.clear() run_workflow(MetadataWorkflow, ingestion_config) run_workflow(MetadataWorkflow, lineage_config) department_table = metadata.get_by_name( Table, - f"{db_service.fullyQualifiedName.root}.AdventureWorks.HumanResources.Department", + f"{db_service.fullyQualifiedName.root}.{db_name}.SalesLT.Customer", nullable=False, ) lineage = metadata.get_lineage_by_id(Table, department_table.id.root) diff --git a/ingestion/tests/integration/sql_server/test_metadata.py b/ingestion/tests/integration/sql_server/test_metadata.py index 945873f4f0b1..37050e78d2fa 100644 --- a/ingestion/tests/integration/sql_server/test_metadata.py +++ b/ingestion/tests/integration/sql_server/test_metadata.py @@ -15,11 +15,11 @@ def test_ingest_metadata( ingestion_config, db_service, metadata, -): +db_name): run_workflow(MetadataWorkflow, ingestion_config) table: Table = metadata.get_by_name( Table, - f"{db_service.fullyQualifiedName.root}.AdventureWorks.HumanResources.Department", + f"{db_service.fullyQualifiedName.root}.{db_name}.SalesLT.Customer", ) assert table is not None assert table.columns[0].name.root == "DepartmentID" diff --git a/ingestion/tests/integration/sql_server/test_usage.py b/ingestion/tests/integration/sql_server/test_usage.py index b1eb137c8881..457e1e894e09 100644 --- a/ingestion/tests/integration/sql_server/test_usage.py +++ b/ingestion/tests/integration/sql_server/test_usage.py @@ -10,7 +10,7 @@ @pytest.fixture() -def usage_config(db_service, workflow_config): +def usage_config(db_service, workflow_config, db_name): return { "source": { "type": "mssql-usage", @@ -19,7 +19,7 @@ def usage_config(db_service, workflow_config): "config": { "queryLogDuration": 2, "resultLimit": 1000, - "databaseFilterPattern": {"includes": ["TestDB", "AdventureWorks"]}, + "databaseFilterPattern": {"includes": ["TestDB", db_name]}, }, }, },