From 71a1678d6e45963b39114b45d3582c84f3a3c6b8 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Fri, 8 Dec 2023 18:28:09 -0500 Subject: [PATCH] docs(ingest/sql-queries): Rearrange sections --- .../sql-queries/{sql-queries.md => sql-queries_pre.md} | 5 +++-- .../src/datahub/ingestion/source/sql_queries.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) rename metadata-ingestion/docs/sources/sql-queries/{sql-queries.md => sql-queries_pre.md} (67%) diff --git a/metadata-ingestion/docs/sources/sql-queries/sql-queries.md b/metadata-ingestion/docs/sources/sql-queries/sql-queries_pre.md similarity index 67% rename from metadata-ingestion/docs/sources/sql-queries/sql-queries.md rename to metadata-ingestion/docs/sources/sql-queries/sql-queries_pre.md index e829b4366bb84..2d915f0bcf84d 100644 --- a/metadata-ingestion/docs/sources/sql-queries/sql-queries.md +++ b/metadata-ingestion/docs/sources/sql-queries/sql-queries_pre.md @@ -1,8 +1,9 @@ -### Example Queries File +#### Example Queries File ```json {"query": "SELECT x FROM my_table", "timestamp": 1689232738.051, "user": "user_a", "downstream_tables": [], "upstream_tables": ["my_database.my_schema.my_table"]} {"query": "INSERT INTO my_table VALUES (1, 'a')", "timestamp": 1689232737.669, "user": "user_b", "downstream_tables": ["my_database.my_schema.my_table"], "upstream_tables": []} ``` -Note that this is not a valid standard JSON file, but rather a file containing one JSON object per line. +Note that this file does not represent a single JSON object, but instead newline-delimited JSON, in which +each line is a separate JSON object. diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py index 58e9682df935e..c3d6657c81fa7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py @@ -93,8 +93,9 @@ def compute_stats(self) -> None: @capability(SourceCapability.LINEAGE_FINE, "Parsed from SQL queries") class SqlQueriesSource(Source): """ - This source reads a specifically-formatted JSON file containing SQL queries and parses them to generate lineage. + This source reads a newline-delimited JSON file containing SQL queries and parses them to generate lineage. + ### Query File Format This file should contain one JSON object per line, with the following fields: - query: string - The SQL query to parse. - timestamp (optional): number - The timestamp of the query, in seconds since the epoch.