From 0d8b66d9ba9e45699f78303fd481d4fed8b3dc1b Mon Sep 17 00:00:00 2001 From: Ross Turk Date: Wed, 25 Jan 2023 20:44:30 -0500 Subject: [PATCH 1/5] Add a new script for stopping docker deployment Signed-off-by: Ross Turk --- docker/down.sh | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100755 docker/down.sh diff --git a/docker/down.sh b/docker/down.sh new file mode 100755 index 0000000000..3bb04863b6 --- /dev/null +++ b/docker/down.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# +# SPDX-License-Identifier: Apache-2.0 + +set -e + +title() { + echo -e "\033[1m${1}\033[0m" +} + +usage() { + echo "usage: ./$(basename -- ${0}) [--api-port PORT] [--web-port PORT] [--tag TAG]" + echo "A script used to bring down Marquez when run via Docker" + echo + title "ARGUMENTS:" + echo " -a, --api-port int api port (default: 5000)" + echo " -m, --api-admin-port int api admin port (default: 5001)" + echo " -w, --web-port int web port (default: 3000)" + echo " -t, --tag string image tag (default: latest)" + echo +} + +# Change working directory to project root +project_root=$(git rev-parse --show-toplevel) +cd "${project_root}/" + +compose_files="-f docker-compose.yml" +args="--remove-orphans" + +API_PORT=5000 +API_ADMIN_PORT=5001 +WEB_PORT=3000 +TAG=0.19.0 +while [ $# -gt 0 ]; do + case $1 in + -a|'--api-port') + shift + API_PORT="${1}" + ;; + -m|'--api-admin-port') + shift + API_ADMIN_PORT="${1}" + ;; + -w|'--web-port') + shift + WEB_PORT="${1}" + ;; + -t|'--tag') + shift + TAG="${1}" + ;; + -h|'--help') + usage + exit 0 + ;; + *) usage + exit 1 + ;; + esac + shift +done + +API_PORT=${API_PORT} API_ADMIN_PORT=${API_ADMIN_PORT} WEB_PORT=${WEB_PORT} TAG="${TAG}" docker-compose $compose_files down $args From 79c447285ebe115671acf2fcd3bf1128f5373de4 Mon Sep 17 00:00:00 2001 From: Ross Turk Date: Wed, 25 Jan 2023 20:49:24 -0500 Subject: [PATCH 2/5] initial test to confirm proper facet syntax & validate UI Signed-off-by: Ross Turk --- docker/metadata.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docker/metadata.json b/docker/metadata.json index e272caf0c1..a89a0bc00f 100644 --- a/docker/metadata.json +++ b/docker/metadata.json @@ -427,6 +427,30 @@ "namespace": "food_delivery", "name": "public.orders_7_days", "facets": { + "columnLineage": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", + "fields": { + "restaurant_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.menus", + "field": "restaurant_id" + } + ] + }, + "menu_id": { + "inputFields": [ + { + "namespace": "file", + "name": "input_dataset", + "field": "c" + } + ] + } + } + }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", From 485ab956c3c2fd179c4cf4a65d25afdd99a429c4 Mon Sep 17 00:00:00 2001 From: Ross Turk Date: Wed, 25 Jan 2023 21:24:56 -0500 Subject: [PATCH 3/5] additional testing Signed-off-by: Ross Turk --- docker/metadata.json | 176 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 172 insertions(+), 4 deletions(-) diff --git a/docker/metadata.json b/docker/metadata.json index a89a0bc00f..f6f312418a 100644 --- a/docker/metadata.json +++ b/docker/metadata.json @@ -431,11 +431,38 @@ "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", "fields": { + "order_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders", + "field": "id" + } + ] + }, + "placed_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders", + "field": "placed_on" + } + ] + }, + "discount_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders", + "field": "discount_id" + } + ] + }, "restaurant_id": { "inputFields": [ { "namespace": "food_delivery", - "name": "public.menus", + "name": "public.menu", "field": "restaurant_id" } ] @@ -443,9 +470,27 @@ "menu_id": { "inputFields": [ { - "namespace": "file", - "name": "input_dataset", - "field": "c" + "namespace": "food_delivery", + "name": "public.menu", + "field": "id" + } + ] + }, + "menu_item_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.menu_items", + "field": "id" + } + ] + }, + "category_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.categories", + "field": "id" } ] } @@ -1059,6 +1104,129 @@ "namespace": "food_delivery", "name": "public.delivery_7_days", "facets": { + "columnLineage": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", + "fields": { + "order_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders_7_days", + "field": "order_id" + } + ] + }, + "order_placed_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders_7_days", + "field": "placed_on" + } + ] + }, + "order_dispatched_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.order_status", + "field": "transitioned_at" + } + ] + }, + "order_delivered_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.order_status", + "field": "transitioned_at" + } + ] + }, + "customer_email": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.customers", + "field": "email" + } + ] + }, + "customer_address": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.customers", + "field": "address" + } + ] + }, + "discount_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders_7_days", + "field": "discount_id" + } + ] + }, + "menu_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders_7_days", + "field": "menu_id" + } + ] + }, + "restaurant_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.orders_7_days", + "field": "restaurant_id" + } + ] + }, + "restaurant_address": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.restaurants", + "field": "address" + } + ] + }, + "menu_item_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.restaurants", + "field": "menu_item_id" + } + ] + }, + "category_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.menu_item_id", + "field": "category_id" + } + ] + }, + "driver_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.drivers", + "field": "id" + } + ] + } + } + }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", From efd612c15eb5521d28c9792cc8230c51b2c10720 Mon Sep 17 00:00:00 2001 From: Ross Turk Date: Wed, 25 Jan 2023 22:53:13 -0500 Subject: [PATCH 4/5] Adding additional column-level lineage facet metadata Signed-off-by: Ross Turk --- docker/metadata.json | 192 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 11 deletions(-) diff --git a/docker/metadata.json b/docker/metadata.json index f6f312418a..0b6c656cb6 100644 --- a/docker/metadata.json +++ b/docker/metadata.json @@ -395,7 +395,7 @@ "sql": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SQLJobFacet.json", - "query": "INSERT INTO orders_7_days (order_id, placed_on, discount_id, restaurant_id, menu_id, menu_item_id, category_id)\n SELECT o.id AS order_id, o.placed_on, o.discount_id, m.restaurant_id, m.id AS menu_id, mi.id AS menu_item_id, c.id AS category_id\n FROM orders AS o\n INNER JOIN menu_items AS mi\n ON menu_items.id = o.menu_item_id\n INNER JOIN categories AS c\n ON c.id = mi.category_id\n INNER JOIN menu AS m\n ON m.id = c.menu_id\n WHERE o.placed_on >= NOW() - interval '7 days';" + "query": "INSERT INTO orders_7_days (order_id, placed_on, discount_id, restaurant_id, menu_id, menu_item_id, category_id)\n SELECT o.id AS order_id, o.placed_on, o.discount_id, m.restaurant_id, m.id AS menu_id, mi.id AS menu_item_id, c.id AS category_id\n FROM orders AS o\n INNER JOIN menu_items AS mi\n ON menu_items.id = o.menu_item_id\n INNER JOIN categories AS c\n ON c.id = mi.category_id\n INNER JOIN menus AS m\n ON m.id = c.menu_id\n WHERE o.placed_on >= NOW() - interval '7 days';" }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", @@ -462,7 +462,7 @@ "inputFields": [ { "namespace": "food_delivery", - "name": "public.menu", + "name": "public.menus", "field": "restaurant_id" } ] @@ -471,8 +471,13 @@ "inputFields": [ { "namespace": "food_delivery", - "name": "public.menu", + "name": "public.menus", "field": "id" + }, + { + "namespace": "food_delivery", + "name": "public.categories", + "field": "menu_id" } ] }, @@ -482,6 +487,11 @@ "namespace": "food_delivery", "name": "public.menu_items", "field": "id" + }, + { + "namespace": "food_delivery", + "name": "public.orders", + "field": "menu_item_id" } ] }, @@ -491,6 +501,11 @@ "namespace": "food_delivery", "name": "public.categories", "field": "id" + }, + { + "namespace": "food_delivery", + "name": "public.menu_items", + "field": "category_id" } ] } @@ -1114,6 +1129,11 @@ "namespace": "food_delivery", "name": "public.orders_7_days", "field": "order_id" + }, + { + "namespace": "food_delivery", + "name": "public.order_status", + "field": "order_id" } ] }, @@ -1186,6 +1206,11 @@ "namespace": "food_delivery", "name": "public.orders_7_days", "field": "restaurant_id" + }, + { + "namespace": "food_delivery", + "name": "public.restaurants", + "field": "id" } ] }, @@ -1222,6 +1247,11 @@ "namespace": "food_delivery", "name": "public.drivers", "field": "id" + }, + { + "namespace": "food_delivery", + "name": "public.order_status", + "field": "driver_id" } ] } @@ -1266,12 +1296,31 @@ "tags": [], "description": "The email address of the customer." }, + { + "name": "customer_address", + "type": "VARCHAR", + "tags": [], + "description": "The physical address of the customer." + }, + { "name": "menu_id", "type": "INTEGER", "tags": [], "description": "The ID of the menu related to the order." }, + { + "name": "restaurant_id", + "type": "INTEGER", + "tags": [], + "description": "The ID of the restaurant." + }, + { + "name": "restaurant_address", + "type": "VARCHAR", + "tags": [], + "description": "The physical address of the restaurant." + }, { "name": "menu_item_id", "type": "INTEGER", @@ -1348,7 +1397,7 @@ "sql": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SQLJobFacet.json", - "query": "INSERT INTO top_delivery_times (order_id, order_placed_on, order_dispatched_on, order_delivered_on, order_delivery_time,\n customer_email, restaurant_id, driver_id)\n SELECT order_id, order_placed_on, order_delivered_on, DATEDIFF(minute, order_placed_on, order_delivered_on) AS order_delivery_time,\n customer_email, restaurant_id, driver_id\n FROM delivery_7_days\n GROUP BY restaurant_id\n ORDER BY order_delivery_time DESC\n LIMIT 1;\nINSERT INTO discounts (amount_off, customer_email, starts_on, ends_on)\n SELECT customer_email, DATEDIFF(minute, order_placed_on, order_delivered_on) AS order_delivery_time,\n CASE WHEN order_delivery_time >= 60 THEN 15\n ELSE 5\n END AS amount_off,\n NOW() AS starts_on,\n NOW() + interval '7 days' AS ends_on\n FROM top_delivery_times\n WHERE order_delivery_time >= 45;" + "query": "INSERT INTO top_delivery_times (order_id, order_placed_on, order_delivered_on, order_delivery_time,\n customer_email, restaurant_id, driver_id)\n SELECT order_id, order_placed_on, order_delivered_on, DATEDIFF(minute, order_placed_on, order_delivered_on) AS order_delivery_time,\n customer_email, restaurant_id, driver_id\n FROM delivery_7_days\n GROUP BY restaurant_id\n ORDER BY order_delivery_time DESC\n LIMIT 1;\nINSERT INTO discounts (amount_off, customer_email, starts_on, ends_on)\n SELECT customer_email, DATEDIFF(minute, order_placed_on, order_delivered_on) AS order_delivery_time,\n CASE WHEN order_delivery_time >= 60 THEN 15\n ELSE 5\n END AS amount_off,\n NOW() AS starts_on,\n NOW() + interval '7 days' AS ends_on\n FROM top_delivery_times\n WHERE order_delivery_time >= 45;" }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", @@ -1368,6 +1417,80 @@ "namespace": "food_delivery", "name": "public.top_delivery_times", "facets": { + "columnLineage": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", + "fields": { + "order_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_id" + } + ] + }, + "order_placed_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_placed_on" + } + ] + }, + "order_delivered_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_delivered_on" + } + ] + }, + "order_delivery_time": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_placed_on" + }, + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_delivered_on" + } + ] + }, + "customer_email": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "customer_email" + } + ] + }, + "restaurant_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "restaurant_id" + } + ] + }, + "driver_id": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "driver_id" + } + ] + } + } + }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", @@ -1389,12 +1512,6 @@ "tags": [], "description": "An ISO-8601 timestamp representing the date/time the order was placed." }, - { - "name": "order_dispatched_on", - "type": "TIMESTAMP", - "tags": [], - "description": "An ISO-8601 timestamp representing the date/time the order was dispatched." - }, { "name": "order_delivered_on", "type": "TIMESTAMP", @@ -1402,7 +1519,7 @@ "description": "An ISO-8601 timestamp representing the date/time the order was delivered." }, { - "name": "order_delivered_time", + "name": "order_delivery_time", "type": "TIMESTAMP", "tags": [], "description": "An ISO-8601 timestamp representing the total time of delivery." @@ -1439,6 +1556,35 @@ "namespace": "food_delivery", "name": "public.discounts", "facets": { + "columnLineage": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", + "fields": { + "amount_off": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_placed_on" + }, + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "order_delivered_on" + } + ] + }, + "customer_email": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.delivery_7_days", + "field": "customer_email" + } + ] + } + } + }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", @@ -1548,6 +1694,30 @@ "namespace": "food_delivery", "name": "public.popular_orders_day_of_week", "facets": { + "columnLineage": { + "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", + "_schemaURL": "https://openlineage.io/spec/facets/1-0-1/ColumnLineageDatasetFacet.json", + "fields": { + "order_day_of_week": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.top_delivery_times", + "field": "order_placed_on" + } + ] + }, + "order_placed_on": { + "inputFields": [ + { + "namespace": "food_delivery", + "name": "public.top_delivery_times", + "field": "order_placed_on" + } + ] + } + } + }, "documentation": { "_producer": "https://github.com/MarquezProject/marquez/blob/main/docker/metadata.json", "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DocumentationDatasetFacet.json", From 87db304895c421fc3c93f2cb9a69fc7b101e204a Mon Sep 17 00:00:00 2001 From: Ross Turk Date: Wed, 25 Jan 2023 22:58:30 -0500 Subject: [PATCH 5/5] Update changelog for column-level seed data Signed-off-by: Ross Turk --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66871df840..7c64ccffda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.29.0...HEAD) +* Additions to seed data for column-lineage [`#2381`](https://github.com/MarquezProject/marquez/pull/2381) [@rossturk](https://github.com/rossturk) + ## [0.29.0](https://github.com/MarquezProject/marquez/compare/0.28.0...0.29.0) - 2022-12-19 ### Added @@ -897,4 +899,4 @@ ---- SPDX-License-Identifier: Apache-2.0 -Copyright 2018-2023 contributors to the Marquez project. \ No newline at end of file +Copyright 2018-2023 contributors to the Marquez project.