From 64631875080ab6434b9a2f8b38741a3c217746ee Mon Sep 17 00:00:00 2001 From: Timothy Lee Date: Thu, 15 Jun 2023 18:27:22 +0800 Subject: [PATCH] Apply the stupid dup to a intermediate model and use recursive cte Signed-off-by: Timothy Lee --- models/int_customer_order_history_joined.sql | 24 ++++++++++++++++---- models/staging/stg_orders.sql | 22 +----------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/models/int_customer_order_history_joined.sql b/models/int_customer_order_history_joined.sql index 9aedd70a2..8c5a54c93 100644 --- a/models/int_customer_order_history_joined.sql +++ b/models/int_customer_order_history_joined.sql @@ -1,4 +1,4 @@ -with customers as ( +with recursive customers as ( select * from {{ ref('stg_customers') }} @@ -45,6 +45,22 @@ customer_payments as ( ), +dup as ( + select customer_payments.*, 1 as lvl + from customer_payments + + union all + + select customer_payments.*, dup.lvl + 1 + from dup + join customer_payments on customer_payments.customer_id = dup.customer_id + where dup.lvl < 8000 +), + +dedup as ( + select distinct customer_id, total_amount from dup +), + final as ( select @@ -54,15 +70,15 @@ final as ( customer_orders.first_order, customer_orders.most_recent_order, customer_orders.number_of_orders, - customer_payments.total_amount as customer_lifetime_value + dedup.total_amount as customer_lifetime_value from customers left join customer_orders on customers.customer_id = customer_orders.customer_id - left join customer_payments - on customers.customer_id = customer_payments.customer_id + left join dedup + on customers.customer_id = dedup.customer_id ) diff --git a/models/staging/stg_orders.sql b/models/staging/stg_orders.sql index c83a77433..46cbb2d98 100644 --- a/models/staging/stg_orders.sql +++ b/models/staging/stg_orders.sql @@ -20,26 +20,6 @@ renamed as ( ), -dup as ( - SELECT * FROM renamed - {% for i in range(0, 50) %} - UNION - SELECT * FROM renamed - {% endfor %} -), - -dupdup as ( - SELECT * FROM dup - {% for i in range(0, 100) %} - UNION - SELECT * FROM dup - {% endfor %} -), - -dedup as ( - select distinct order_id, customer_id, order_date, status from dupdup -), - -- Shift the order_date by the number of days since 2018-04-09 (the max order_date in the raw data) shift_date as ( @@ -49,7 +29,7 @@ shift_date as ( (order_date + datediff('day', date '2018-04-09', CURRENT_DATE)::int) as order_date, status - from dedup + from renamed ) select * from shift_date