-
Notifications
You must be signed in to change notification settings - Fork 0
/
BannerMovement.py
88 lines (79 loc) · 6.01 KB
/
BannerMovement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#---------------------------------------------------------------------------------------------------------------------------------#
# Name: Banner Movement Analysis
# Date: 04/20/2018
#---------------------------------------------------------------------------------------------------------------------------------#
from pyspark.sql import functions as F
# Purchases of known Customers with more than one purchase at any male banner
bannerOrders = sqlContext.sql('''SELECT distinct pft.final_individ, purchase_id, fiscal_year, com_company
FROM customer360.purchase_fact_table pft
INNER JOIN (
SELECT * FROM (
SELECT distinct (pft.final_individ), count(distinct purchase_id) as numPurchases
FROM customer360.purchase_fact_table pft
WHERE order_type = 'OP' and pft.final_individ is not null
AND com_company in (1,21,20,39)
GROUP BY final_individ
) AS T
WHERE T.numPurchases > 1) b ON pft.final_individ = b.final_individ
WHERE order_type = 'OP' and pft.final_individ is not null
AND com_company in (1,21,20,34)
ORDER BY final_individ, fiscal_year
''')
# Customers that made their first purchase on Eastbay since 2013
firstPurchaseEB = sqlContext.sql('''SELECT final_individ, date_entered, yearAcquired
FROM (
SELECT final_individ, date_entered, com_company, fiscal_year as yearAcquired,
ROW_NUMBER() OVER(PARTITION by final_individ ORDER BY date_entered ASC) AS Rank
FROM customer360.purchase_fact_table
WHERE final_individ is not null
AND date_entered is not null
AND order_type = 'OP'
) AS A
WHERE Rank = 1 and com_company = 1 and yearAcquired > 2012
ORDER BY A.final_individ, A.date_entered, A.com_company, A.yearAcquired
''')
df = bannerOrders.join(firstPurchaseEB,"final_individ")
# Eastbay Customers that shopped at other banners- by year
cust_df = df.groupBy(["com_company","fiscal_year"]).agg(F.count("purchase_id").alias("orderCounts"),F.countDistinct("final_individ").alias("Customers"))
# Pivot unique customers by year and banner
cust_df = cust_df.groupBy("fiscal_year").pivot("com_company").sum("Customers").toPandas()
cust_df = cust_df.to_csv("EastbayAcq_Customers.csv", index = False)
#---------------------------------------------------------------------------------------------------------------------------------#
# Launch Purchase Analysis For Customers with more than one purchase at many male banners that were acquired at Eastbay
bannerOrders = sqlContext.sql('''SELECT distinct pft.final_individ, purchase_id, fiscal_year, com_company
FROM customer360.purchase_fact_table pft
INNER JOIN (
SELECT * FROM (
SELECT distinct (pft.final_individ), count(distinct purchase_id) as numPurchases
FROM customer360.purchase_fact_table pft
WHERE order_type = 'OP' and pft.final_individ is not null
AND com_company in (1,21,20,39)
GROUP BY final_individ
) AS T
WHERE T.numPurchases > 1) b ON pft.final_individ = b.final_individ
WHERE order_type = 'OP' and pft.final_individ is not null
AND com_company in (1,21,20,34) and launch_ind = true
ORDER BY final_individ, fiscal_year
''')
# Customers that made their first purchase on Eastbay since 2013
firstPurchaseEB = sqlContext.sql('''SELECT final_individ, date_entered, yearAcquired
FROM (
SELECT final_individ, date_entered, com_company, fiscal_year as yearAcquired,
ROW_NUMBER() OVER(PARTITION by final_individ ORDER BY date_entered ASC) AS Rank
FROM customer360.purchase_fact_table
WHERE final_individ is not null
AND date_entered is not null
AND order_type = 'OP'
) AS A
WHERE Rank = 1 and com_company = 1 and yearAcquired > 2012
ORDER BY A.final_individ, A.date_entered, A.com_company, A.yearAcquired
''')
df = bannerOrders.join(firstPurchaseEB,"final_individ")
# Eastbay Customers that shopped at other banners- by year
cust_df = df.groupBy(["com_company","fiscal_year"]).agg(F.countDistinct("final_individ").alias("Customers"))
# Pivot unique customers by year and banner
cust_df = cust_df.groupBy("fiscal_year").pivot("com_company").sum("Customers").toPandas()
cust_df = cust_df.to_csv("EastbayAcq_Customers_Launches.csv", index = False)
#---------------------------------------------------------------------------------------------------------------------------------#
# Total Count since 2013
cust_df = df.groupBy(["com_company"]).agg(F.countDistinct("final_individ").alias("Customers"))