forked from dataquestio/solutions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMission374Solutions.Rmd
255 lines (219 loc) · 6.72 KB
/
Mission374Solutions.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
---
title: "Answering Business Questions using SQL (Intermediate SQL in R): Guided Project Solutions"
output: html_document
---
# Creating Helper Functions
```{r}
library(RSQLite)
library(DBI)
db <- 'chinook.db'
run_query <- function(q) {
conn <- dbConnect(SQLite(), db)
result <- dbGetQuery(conn, q)
dbDisconnect(conn)
return(result)
}
show_tables <- function() {
q = "SELECT name, type FROM sqlite_master WHERE type IN ('table', 'view')"
return(run_query(q))
}
show_tables()
```
# Selecting New Albums to Purchase
```{r}
albums_to_purchase = '
WITH usa_tracks_sold AS
(
SELECT il.* FROM invoice_line il
INNER JOIN invoice i on il.invoice_id = i.invoice_id
INNER JOIN customer c on i.customer_id = c.customer_id
WHERE c.country = "USA"
)
SELECT
g.name genre,
count(uts.invoice_line_id) tracks_sold,
cast(count(uts.invoice_line_id) AS FLOAT) / (
SELECT COUNT(*) from usa_tracks_sold
) percentage_sold
FROM usa_tracks_sold uts
INNER JOIN track t on t.track_id = uts.track_id
INNER JOIN genre g on g.genre_id = t.genre_id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
'
run_query(albums_to_purchase)
```
```{r}
library(ggplot2)
genre_sales = run_query(albums_to_purchase)
ggplot(data = genre_sales, aes(x = reorder(genre, -percentage_sold),
y = percentage_sold)) +
geom_bar(stat = "identity")
```
Among the genres represented in our list of 4 albums, punk, blues and pop are the highest rated. Therefore, we should recommend:
- Red Tone (Punk)
- Slim Jim Bites (Blues)
- Meteor and the Girls (Pop)
By far though, rock makes up the majority of the sales. To better capture sales in the USA, we might want to ask the record label if they have any up-and-coming rock bands.
# Analyzing Employee Sales Performance
```{r}
employee_sales_performance = '
WITH customer_support_rep_sales AS
(
SELECT
i.customer_id,
c.support_rep_id,
SUM(i.total) total
FROM invoice i
INNER JOIN customer c ON i.customer_id = c.customer_id
GROUP BY 1,2
)
SELECT
e.first_name || " " || e.last_name employee,
e.hire_date,
SUM(csrs.total) total_sales
FROM customer_support_rep_sales csrs
INNER JOIN employee e ON e.employee_id = csrs.support_rep_id
GROUP BY 1;
'
run_query(employee_sales_performance)
```
```{r}
employee_sales = run_query(employee_sales_performance)
ggplot(data = employee_sales, aes(x = reorder(employee, -total_sales),
y = total_sales)) +
geom_bar(stat = "identity")
```
Jane Peacock has the highest amount of sales, but she also has been at the company the longest. If we really want to hone in on employee efficiency, we might want to standardize sales by the number of days or hours worked.
# Visualizing Sales by Country
```{r}
sales_by_country = '
WITH country_or_other AS
(
SELECT
CASE
WHEN (
SELECT count(*)
FROM customer
where country = c.country
) = 1 THEN "Other"
ELSE c.country
END AS country,
c.customer_id,
il.*
FROM invoice_line il
INNER JOIN invoice i ON i.invoice_id = il.invoice_id
INNER JOIN customer c ON c.customer_id = i.customer_id
)
SELECT
country,
customers,
total_sales,
average_order,
customer_lifetime_value
FROM
(
SELECT
country,
count(distinct customer_id) customers,
SUM(unit_price) total_sales,
SUM(unit_price) / count(distinct customer_id) customer_lifetime_value,
SUM(unit_price) / count(distinct invoice_id) average_order,
CASE
WHEN country = "Other" THEN 1
ELSE 0
END AS sort
FROM country_or_other
GROUP BY country
ORDER BY sort ASC, total_sales DESC
);
'
run_query(sales_by_country)
```
# Visualizing Sales by Country
```{r}
country_metrics = run_query(sales_by_country)
ggplot(data = country_metrics, aes(x = reorder(country, -total_sales),
y = total_sales,
fill = country)) +
geom_bar(stat = "identity") +
labs(
title = "Total sales by country",
x = "Country",
y = "Total Sales"
) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(data = country_metrics, aes(x = reorder(country, -customers),
y = customers,
fill = country)) +
geom_bar(stat = "identity") +
coord_polar("y") +
labs(
title = "Number of customers by country",
x = "Country",
y = "Customers"
)
ggplot(data = country_metrics, aes(x = reorder(country, -customer_lifetime_value),
y = customer_lifetime_value,
color = country)) +
geom_point(stat = "identity") +
labs(
title = "Customer lifetime value by country",
x = "Country",
y = "Customer Lifetime Value"
) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
# Albums vs Individual Tracks
```{r}
albums_vs_tracks = '
WITH invoice_first_track AS
(
SELECT
il.invoice_id invoice_id,
MIN(il.track_id) first_track_id
FROM invoice_line il
GROUP BY 1
)
SELECT
album_purchase,
COUNT(invoice_id) number_of_invoices,
CAST(count(invoice_id) AS FLOAT) / (
SELECT COUNT(*) FROM invoice
) percent
FROM
(
SELECT
ifs.*,
CASE
WHEN
(
SELECT t.track_id FROM track t
WHERE t.album_id = (
SELECT t2.album_id FROM track t2
WHERE t2.track_id = ifs.first_track_id
)
EXCEPT
SELECT il2.track_id FROM invoice_line il2
WHERE il2.invoice_id = ifs.invoice_id
) IS NULL
AND
(
SELECT il2.track_id FROM invoice_line il2
WHERE il2.invoice_id = ifs.invoice_id
EXCEPT
SELECT t.track_id FROM track t
WHERE t.album_id = (
SELECT t2.album_id FROM track t2
WHERE t2.track_id = ifs.first_track_id
)
) IS NULL
THEN "yes"
ELSE "no"
END AS "album_purchase"
FROM invoice_first_track ifs
)
GROUP BY album_purchase;
'
run_query(albums_vs_tracks)
```
Album purchases account for almost a quarter of the total sales, so it is inadvisable to change strategy to just purchase the most popular tracks.