From b2f21fa070f954acd145f35a9999ed6740fabea5 Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Fri, 6 Sep 2024 12:42:41 +0530 Subject: [PATCH] Fixes #17085: Update Oracle count & unique count function to handle HexByteString/Blob types (#17596) --- .../metadata/profiler/orm/functions/count.py | 9 +++++++ .../profiler/orm/functions/unique_count.py | 6 +++++ ingestion/tests/cli_e2e/test_cli_oracle.py | 26 +++++++++---------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/ingestion/src/metadata/profiler/orm/functions/count.py b/ingestion/src/metadata/profiler/orm/functions/count.py index b6272d79be94..5c7a8fb4e3db 100644 --- a/ingestion/src/metadata/profiler/orm/functions/count.py +++ b/ingestion/src/metadata/profiler/orm/functions/count.py @@ -18,6 +18,7 @@ from metadata.profiler.metrics.core import CACHE from metadata.profiler.orm.registry import Dialects +from metadata.profiler.orm.types.custom_hex_byte_string import HexByteString # Keep SQA docs style defining custom constructs # pylint: disable=consider-using-f-string,duplicate-code @@ -36,6 +37,14 @@ def _(element, compiler, **kw): return compiler.process(element.clauses, **kw) +@compiles(CountFn, Dialects.Oracle) +def _(element, compiler, **kw): + col_type = element.clauses.clauses[0].type + if isinstance(col_type, HexByteString): + return f"DBMS_LOB.GETLENGTH({compiler.process(element.clauses, **kw)})" + return compiler.process(element.clauses, **kw) + + @compiles(CountFn, Dialects.MSSQL) def _(element, compiler, **kw): col_type = element.clauses.clauses[0].type diff --git a/ingestion/src/metadata/profiler/orm/functions/unique_count.py b/ingestion/src/metadata/profiler/orm/functions/unique_count.py index 9acb809df817..a464ebd040f5 100644 --- a/ingestion/src/metadata/profiler/orm/functions/unique_count.py +++ b/ingestion/src/metadata/profiler/orm/functions/unique_count.py @@ -53,5 +53,11 @@ def _unique_count_query_mssql(col, session, sample): ) +def _unique_count_query_oracle(col, session, sample): + count_fn = CountFn(col) + return _unique_count_query(count_fn, session, sample) + + _unique_count_query_mapper = defaultdict(lambda: _unique_count_query) _unique_count_query_mapper[Dialects.MSSQL] = _unique_count_query_mssql +_unique_count_query_mapper[Dialects.Oracle] = _unique_count_query_oracle diff --git a/ingestion/tests/cli_e2e/test_cli_oracle.py b/ingestion/tests/cli_e2e/test_cli_oracle.py index f870b17da432..3b9ba6e78ac0 100644 --- a/ingestion/tests/cli_e2e/test_cli_oracle.py +++ b/ingestion/tests/cli_e2e/test_cli_oracle.py @@ -15,8 +15,6 @@ from typing import List -import pytest - from metadata.ingestion.api.status import Status from .base.e2e_types import E2EType @@ -48,14 +46,19 @@ class OracleCliTest(CliCommonDB.TestSuite, SQACommonMethods): insert_data_queries: List[str] = [ """ - INSERT INTO admin.admin_emp (empno, ename, ssn, job, mgr, sal, comm, comments, status) WITH names AS ( -SELECT 1, 'John Doe', 12356789, 'Manager', 121, 5200.0, 5000.0, 'Amazing', 'Active' FROM dual UNION ALL -SELECT 2, 'Jane Doe', 123467189, 'Clerk', 131, 503.0, 5000.0, 'Wow', 'Active' FROM dual UNION ALL -SELECT 3, 'Jon Doe', 123562789, 'Assistant', 141, 5000.0, 5000.0, 'Nice', 'Active' FROM dual UNION ALL -SELECT 4, 'Jon Doe', 13456789, 'Manager', 151, 5050.0, 5000.0, 'Excellent', 'Active' FROM dual + INSERT INTO admin.admin_emp (empno, ename, ssn, job, mgr, sal, comm, comments, status, photo) WITH names AS ( +SELECT 1, 'John Doe', 12356789, 'Manager', 121, 5200.0, 5000.0, 'Amazing', 'Active', EMPTY_BLOB() FROM dual UNION ALL +SELECT 2, 'Jane Doe', 123467189, 'Clerk', 131, 503.0, 5000.0, 'Wow', 'Active', EMPTY_BLOB() FROM dual UNION ALL +SELECT 3, 'Jon Doe', 123562789, 'Assistant', 141, 5000.0, 5000.0, 'Nice', 'Active', EMPTY_BLOB() FROM dual ) SELECT * from names -""" +""", + """ +INSERT INTO admin.admin_emp (empno, ename, ssn, job, mgr, sal, comm, comments, status, photo) WITH names AS ( +SELECT 4, 'Jon Doe', 13456789, 'Manager', 151, 5050.0, 5000.0, 'Excellent', 'Active', UTL_RAW.CAST_TO_RAW('your_binary_data') FROM dual +) +SELECT * from names +""", ] drop_table_query: str = """ @@ -98,11 +101,11 @@ def view_column_lineage_count(self) -> int: @staticmethod def fqn_created_table() -> str: - return "e2e_oracle.default.admin.admin_emp" + return "e2e_oracle.default.admin.ADMIN_EMP" @staticmethod def _fqn_deleted_table() -> str: - return "e2e_oracle.default.admin.admin_emp" + return "e2e_oracle.default.admin.ADMIN_EMP" @staticmethod def get_includes_schemas() -> List[str]: @@ -136,9 +139,6 @@ def expected_filtered_table_excludes() -> int: def expected_filtered_mix() -> int: return 43 - @pytest.mark.xfail( - reason="Issue Raised: https://github.com/open-metadata/OpenMetadata/issues/17085" - ) def test_create_table_with_profiler(self) -> None: # delete table in case it exists self.delete_table_and_view()