-
Notifications
You must be signed in to change notification settings - Fork 0
/
rideau_canal.Rmd
201 lines (161 loc) · 8.21 KB
/
rideau_canal.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
---
title: "Rideau Canal"
output: html_document
---
```{r}
library(lubridate)
library(tidyverse)
```
Import the dates on which the Rideau Canal Skateway was opened. This input file was originally copied from [Wikipedia](https://en.wikipedia.org/w/index.php?title=Rideau_Canal&oldid=995637706#Skateway "Wikipedia article: Rideau Canal, section Skateway, revision 995637706").
```{r Import dates}
# Import data
dates <- read.csv("rideau_canal_dates.csv", stringsAsFactors = F, header = T)#, row.names = 1)
dates$opening_date_fmt <- as.Date(dates$opening_date, format = "%B %d, %Y")
dates$month <- as.numeric(format(dates$opening_date_fmt, "%m"))
dates$day <- as.numeric(format(dates$opening_date_fmt, "%d"))
```
Find the earliest and latest opening dates.
```{r Find the earliest and latest opening dates}
# Get minimum and maximum dates
# It's sorta tricky since a season spans two years (e.g. Dec 2011 J- an 2012)
# For the minimum day look at the earliest year-day before June; for max after June
min_day <- min(yday(dates[month(dates$opening_date_fmt) > 6,]$opening_date_fmt))
min_day <- subset(dates, yday(opening_date_fmt)==min_day)[1,] # In a tie just take the first one
max_day <- max(yday(dates[month(dates$opening_date_fmt) < 6,]$opening_date_fmt))
max_day <- subset(dates, yday(opening_date_fmt)==max_day)[1,] # In a tie just take the first one
cat("Earliest and latest opening days:\n")
cat(min_day$opening_date)
cat("\n")
cat(max_day$opening_date)
```
Add some other useful variables. Add a season variable and a variable counting the days from December 1. Season is defined as the first year (i.e. the 1970-71 season is "1971").
```{r Add season}
# start the axis from Dec 1?
# january + 30 gives days from dec 1
# february + 31 + 30 gives days from dec 1
# dec just use days - 1
dates$from_dec1 <- with(dates,
ifelse(month==1, day + 30,
ifelse(month==2, day + 31 + 30,
day - 1)))
# add season (1970-1971 is the 1970 season)
# if the season opens in December, use that year
# Otherwise use the opening date's year minus one
dates$season <- with(dates, ifelse(
month(opening_date_fmt)==12, year(opening_date_fmt), year(opening_date_fmt) -1
))
#dates$season <- rev(1970:(nrow(dates) - 1 + 1970))
```
Run a linear regression of days after December 1 versus opening day across the entire data set. Obviously this is only an approximation (since eventually it will be possible to not have an opening day).
```{r Regression model and diagnostics}
# regression model
reg_model <- lm(season ~ from_dec1, data = dates)
summary(reg_model)
plot(reg_model)
```
```{r}
# on average, each year the skating season has opened 0.72 days later
# (give or take 0.34 either way)
# how many times did it open in Jan?
# how often has that happened recently?
nrow(dates[dates$month==1,])
nrow(dates[dates$month==12 & dates$season>=2000,]) # seasons since 2000-2001 starting in Jan
nrow(dates[dates$season>=2000,])
nrow(dates[dates$month==12 & 1980 <= dates$season & dates$season < 2000,]) # seasons before 2000-2001 starting in Jan
```
```{r Plot the dates}
ggplot(dates) +
geom_point(aes(x=season, y=from_dec1)) +
geom_hline(yintercept=31, linetype = "dotted") +
geom_smooth(aes(x=season, y=from_dec1),
method = "lm", formula = y ~ x, se = F, colour="red", linetype="dashed") +
annotate("text", 1968, 32.4, label="Jan 1", size=3) +
ylab("Days after Dec 1") + xlab("Season") +
ggtitle("How late in the season does the Rideau Canal Skateway open?")
ggplot(dates) +
geom_point(aes(x=season, y=from_dec1)) +
geom_hline(yintercept=31, linetype = "dotted") +
annotate("text", 1968, 32.4, label="Jan 1", size=3) +
ylab("Days after Dec 1") + xlab("Season") +
ggtitle("How late in the season does the Rideau Canal Skateway open?")
```
```{r Try to improve the plots}
ggplot(dates) +
geom_point(aes(x=season, y=from_dec1)) +
geom_hline(yintercept=31, linetype = "dotted") +
geom_hline(yintercept=62, linetype = "dotted") +
geom_smooth(aes(x=season, y=from_dec1),
method = "lm", formula = y ~ x, se = F, colour="red", linetype="dashed") +
annotate("text", 1968, 32.4, label="Jan 1", size=3) +
annotate("text", 1968, 63.4, label="Feb 1", size=3) +
ylab("Days after Dec 1") + xlab("Season") +
ggtitle("How late in the season does the Rideau Canal Skateway open?")
ggplot(dates) +
geom_point(aes(x=season, y=from_dec1)) +
geom_hline(yintercept=31, linetype = "dotted") +
geom_hline(yintercept=62, linetype = "dotted") +
#geom_smooth(aes(x=season, y=from_dec1),
# method = "lm", formula = y ~ x, se = F, colour="red", linetype="dashed") +
annotate("text", 1968, 32.4, label="Jan 1", size=3) +
annotate("text", 1968, 63.4, label="Feb 1", size=3) +
ylab("Days after Dec 1") + xlab("Season") +
ggtitle("How late in the season does the Rideau Canal Skateway open?") #+
#geom_point(aes(x=2022,y=64),colour="red") # assume Feb 3 opening
```
```{r Trying to make floating bar plots}
# Floating bar plots:
# A bunch of vertical bars where the bottom starts at the opening date and the top goes to the closing date
dates_floating <- read.csv("rideau_canal_dates.csv", stringsAsFactors = F, header = T)#, row.names = 1)
# Format the data
dates_floating$opening_date_formatted <- as.Date(dates_floating$opening_date, format = "%B %d, %Y")
dates_floating$closing_date_formatted <- as.Date(dates_floating$closing_date_fmt, format = "%Y-%m-%d")
# Drop some variables so we define them solely by date
# Also rename them, this dataset became a bit of a hodge podge over the years....
dates_floating <- dates_floating[,c("opening_date_formatted","closing_date_formatted")]
# Add the individual day and month
dates_floating$opening_date_month <- month(dates_floating$opening_date_formatted)
dates_floating$opening_date_day <- day(dates_floating$opening_date_formatted)
dates_floating$closing_date_month <- month(dates_floating$closing_date_formatted)
dates_floating$closing_date_day <- day(dates_floating$closing_date_formatted)
# Set the dates relative to December 1st of each season
dates_floating$opening_from_dec1 <- with(dates_floating,
ifelse(opening_date_month==1, opening_date_day + 30,
ifelse(opening_date_month==2, opening_date_day + 31 + 30,
opening_date_day - 1)))
# Get the date between the opening and closing
dates_floating$closing_from_dec1 <- with(dates_floating,
as.integer(opening_from_dec1 + (closing_date_formatted - opening_date_formatted)))
# Middle of season marker
dates_floating$mid <- with(dates_floating, as.integer(opening_from_dec1 + (closing_from_dec1 - opening_from_dec1)/2))
# add season (1970-1971 is the 1970 season)
dates_floating$season <- with(dates_floating, ifelse(
month(opening_date_formatted)==12, year(opening_date_formatted), year(opening_date_formatted) -1
))
# Add the 2022-23 season (empty)
dates_floating_missing <- dates_floating[0,c("opening_date_formatted","closing_date_formatted")]
dates_floating_missing[1,] <- list(as.Date("2022-12-01", format = "%Y-%m-%d"),
as.Date("2023-03-31", format = "%Y-%m-%d"))
dates_floating_missing$opening_from_dec1 <- 1
dates_floating_missing$closing_from_dec1 <- 120
dates_floating_missing$mid <- 60
dates_floating_missing$season <- 2022
# bar width
w <- 0.9
p <- ggplot() +
geom_crossbar(data = dates_floating,
aes(x = season, y = mid, ymin = opening_from_dec1, ymax = closing_from_dec1),
width = w, colour = "blue", fill = "blue", fatten = NULL) +
geom_crossbar(data = dates_floating_missing,
aes(x = season, y = mid, ymin = opening_from_dec1, ymax = closing_from_dec1),
#width = 1, colour = alpha("red", 0.3), fill = "red", alpha=0.3, fatten = NULL) +
width = w, colour = NA, fill = "red", alpha=0.3, fatten = NULL) +
xlab("Season") +
ylab("Season length (starting December 1)") +
coord_cartesian(ylim = c(0,120))
#+
# coord_flip() +
# xlab("Item") +
# ylab("Value")
#dates_floating$month <- as.numeric(format(dates_floating$date_fmt, "%m"))
#dates_floating$day <- as.numeric(format(dates_floating$date_fmt, "%d"))
```