- ggcalendar
- Step 00. Convenience functions, dates vectors to data frames.
- Step 1 & 2. Compute: from date to x/y, & define StatCalendar
- Step 3. Let’s write a user-facing function
stat_calendar()
- aliasing and convenience
defaults_calendar
&ggcalendar()
Thinking about set of scales/coords etc, that gives you a nice calendar (to wrap up into defaults)- NYC flights Example
- Births example
- data defaults to calendar year and aes(date = date)
- Minimal Viable Packaging
- Traditional README
Note: This README walks through package rational and contains the code that defines proposed package functions and in addition to first-cut testing. TLDR - Jump to traditional readme content
Here’s a proposal for creating calendars with ggplot2 via Stat extension.
When using calendars, ‘when?’ and ‘where?’ are the same question! So, ggcalendar introduces a new positional aesthetic: ‘date’. Let’s put things on the calendar!
In this proposed package, we’ll specify the position of a calendar event
calendar using dates as the required aesthetic:
aes(date = my_variable_of_dates)
! Then you can use layers function
stat_calendar()
and derivative geom functions geom_text_calendar
,
geom_tile_calendar
and geom_point_calendar
to place specific
grobs/mark in the plot space.
Under the hood, the compute_group functions finds the x and y position for the date in the month (x is day in week and y is week in month). Faceting by month is used to prevent over-plotting. Note: automatic faceting by month via ggcalendar() function presupposes that your variable is also named ‘date’.
Other possible directions would be to calculate x and y based on date in month and on month - instead of relying on faceting by month. Furthermore, a dedicated Coord could be created (Teun’s thought). Then maybe dates would just feed generically in as the ‘x’ aes - this sounds cool!
# library(ggcalendar)
library(ggplot2)
library(lubridate)
library(tidyverse)
# install.packages("devtools")
devtools::install_github("EvaMaeRey/ggcalendar")
Because ggplot2’s diet is consists solely of dataframes, we create a number of convenience functions that will help us produce dataframes with column ‘date’ we can feed into ggplot2.
knitrExtra:::chunk_to_r("df_functions")
#' Title
#'
#' @return
#' @export
#'
#' @examples
df_today <- function(){
data.frame(date = Sys.Date())
}
#' Title
#'
#' @param date
#'
#' @return
#' @export
#'
#' @examples
df_day <- function(date = NULL){
if(is.null(date)){date <- Sys.Date()}
data.frame(date = date)
}
#' Title
#'
#' @param start_date
#' @param end_date
#'
#' @return
#' @export
#'
#' @examples
df_dates_interval <- function(start_date, end_date){
data.frame(date = as.Date(start_date):as.Date(end_date) |>
as.Date())
}
#' Title
#'
#' @param month
#' @param year
#'
#' @return
#' @export
#'
#' @examples
df_month <- function(month = NULL, year = NULL){
if(is.null(month)){
date <- Sys.Date()
month <- lubridate::month(date)
}
if(is.numeric(month)){
month <- stringr::str_pad(month, width = 2, pad = "0")
}
if(is.null(year)){
date <- Sys.Date()
year <- lubridate::year(date)
}
paste0(year,"-", month, "-01") |>
lubridate::as_date() ->
start_date
start_date |> lubridate::ceiling_date(unit = "month") ->
end_date
data.frame(date =
df_dates_interval(start_date,
end_date - lubridate::days(1)))
}
#' Title
#'
#' @param date
#'
#' @return
#' @export
#'
#' @examples
df_week <- function(date = NULL){
if(is.null(date)){date <- Sys.Date()}
start_date <- lubridate::floor_date(date, unit = "week")
end_date <- lubridate::ceiling_date(date, unit = "week")
data.frame(date = df_dates_interval(start_date,
end_date - lubridate::days(1)) )
}
#' Title
#'
#' @param date
#'
#' @return
#' @export
#'
#' @examples
return_df_hours_week <- function(date = NULL){
if(is.null(date)){date <- Sys.Date()}
start_date <- lubridate::floor_date(date, unit = "week")
data.frame(date = (start_date + lubridate::hours(1:(24*7-1))))
}
#' Title
#'
#' @param year
#'
#' @return
#' @export
#'
#' @examples
df_year <- function(year = NULL){
if(is.null(year)){year <- lubridate::year(Sys.Date())}
paste0(year, "-01-01") |>
lubridate::as_date() ->
start_date
start_date |> lubridate::ceiling_date(unit = "year") ->
end_date
data.frame(date =
df_dates_interval(start_date,
end_date - lubridate::days(1)))
}
Let’s have a look at some of these.
df_today()
#> date
#> 1 2024-09-03
df_day()
#> date
#> 1 2024-09-03
df_dates_interval(start_date = "2024-10-02", end_date = "2024-10-04")
#> date
#> 1 2024-10-02
#> 2 2024-10-03
#> 3 2024-10-04
df_week()
#> date
#> 1 2024-09-01
#> 2 2024-09-02
#> 3 2024-09-03
#> 4 2024-09-04
#> 5 2024-09-05
#> 6 2024-09-06
#> 7 2024-09-07
df_year() |> head()
#> date
#> 1 2024-01-01
#> 2 2024-01-02
#> 3 2024-01-03
#> 4 2024-01-04
#> 5 2024-01-05
#> 6 2024-01-06
df_month() |> head()
#> date
#> 1 2024-09-01
#> 2 2024-09-02
#> 3 2024-09-03
#> 4 2024-09-04
#> 5 2024-09-05
#> 6 2024-09-06
return_df_hours_week() |> head()
#> date
#> 1 2024-09-01 01:00:00
#> 2 2024-09-01 02:00:00
#> 3 2024-09-01 03:00:00
#> 4 2024-09-01 04:00:00
#> 5 2024-09-01 05:00:00
#> 6 2024-09-01 06:00:00
The computation that we want to be done under the hood relates to translating the here-to-fore unknown positional aesthetic ‘date’ to the first-class ‘x’ and ‘y’ positional aesthetic mappings, as well as variables that can be used in faceting (month).
knitrExtra:::chunk_to_r("get_week_of_month")
As a pre-step to computing many useful variables from our date variable, we focus on this (currently messy) conversion of vectors of dates to week of the month.
get_week_of_month <- function(x){
(- lubridate::wday(x) + lubridate::day(x)) %/%
7 + 1 +
ifelse(lubridate::wday(lubridate::floor_date(lubridate::as_date(x), "month")) == 1, 0, 1)
}
Next, we’ll define a compute group function. A number of variables are created by parsing our date variable.
Then, we’ll pass all this computation to define a new ggproto object StatCalendar. For maximum flexibility, our compute function doesn’t create ggplot2 core aesthetic channels ‘x’, ‘y’, and ‘label’ variables, but instead uses the default_aes field to state what should be first interpreted as x, y and label (thoughts? Maybe only ‘label’ should be managed like this).
knitrExtra:::chunk_to_r("compute_group_calendar")
compute_group_calendar <- function(data, scales){
data |>
dplyr::mutate(wday = lubridate::wday(.data$date)) |>
dplyr::mutate(wday_abbr = lubridate::wday(.data$date, label = TRUE, abbr = TRUE)) |>
dplyr::mutate(week_of_month = get_week_of_month(.data$date)) |>
dplyr::mutate(day = lubridate::day(.data$date)) |>
dplyr::mutate(year = lubridate::year(.data$date) - 2018) |>
dplyr::mutate(month_abbr = lubridate::month(.data$date, abbr = TRUE, label = TRUE)) |>
dplyr::mutate(hour = lubridate::hour(.data$date)) |>
dplyr::mutate(year_academic = lubridate::year(.data$date) +
ifelse(lubridate::month(date) >
6, 1, 0)) |>
dplyr::mutate(month_academic_abbr = .data$month_abbr |>
factor(levels = c("Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
"Jan", "Feb", "Mar", "Apr", "May", "Jun")))
}
StatCalendar <- ggplot2::ggproto(`_class` = "StatCalendar",
`_inherit` = ggplot2::Stat,
required_aes = c("date"),
compute_group = compute_group_calendar,
default_aes = ggplot2::aes(x = ggplot2::after_stat(wday),
y = ggplot2::after_stat(week_of_month),
label = ggplot2::after_stat(day)))
StatWeekly <- ggplot2::ggproto(`_class` = "StatCalendar",
`_inherit` = ggplot2::Stat,
required_aes = c("date"),
compute_group = compute_group_calendar,
default_aes = ggplot2::aes(x = ggplot2::after_stat(wday),
y = ggplot2::after_stat(hour),
label = ggplot2::after_stat(hour)))
Okay, let’s see how our compute and Stat work in action!
df_week() |>
compute_group_calendar()
#> date wday wday_abbr week_of_month day year month_abbr hour
#> 1 2024-09-01 1 Sun 1 1 6 Sep 0
#> 2 2024-09-02 2 Mon 1 2 6 Sep 0
#> 3 2024-09-03 3 Tue 1 3 6 Sep 0
#> 4 2024-09-04 4 Wed 1 4 6 Sep 0
#> 5 2024-09-05 5 Thu 1 5 6 Sep 0
#> 6 2024-09-06 6 Fri 1 6 6 Sep 0
#> 7 2024-09-07 7 Sat 1 7 6 Sep 0
#> year_academic month_academic_abbr
#> 1 2025 Sep
#> 2 2025 Sep
#> 3 2025 Sep
#> 4 2025 Sep
#> 5 2025 Sep
#> 6 2025 Sep
#> 7 2025 Sep
df_month() |>
ggplot() +
aes(date = date) +
geom_text(stat = StatCalendar)
knitrExtra:::chunk_to_r("a_stat_calendar")
#' Title
#'
#' @param mapping
#' @param data
#' @param geom
#' @param position
#' @param na.rm
#' @param show.legend
#' @param inherit.aes
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
stat_calendar <- function(mapping = NULL,
data = NULL,
geom = "text",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE, ...) {
ggplot2::layer(
stat = StatCalendar, # proto object from Step 2
geom = geom, # inherit other behavior
data = data,
mapping = mapping,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
df_year() |>
ggplot() +
aes(date = date) +
stat_calendar(color = "grey") +
facet_wrap(~month(date, label = T, abbr = T)) +
scale_y_reverse()
To give the user a better sense of what they’ll see when using stat_calendar we create the alias, ‘geom_text_calendar()’.
knitrExtra:::chunk_to_r("geom_text_calendar")
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
geom_text_calendar <- function(...){stat_calendar(geom = "text", ...)}
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
geom_point_calendar <- function(...){stat_calendar(geom = "point", ...)}
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
geom_tile_calendar <- function(...){stat_calendar(geom = "tile", ...)}
defaults_calendar
& ggcalendar()
Thinking about set of scales/coords etc, that gives you a nice calendar (to wrap up into defaults)
In our test of stat_calendar, we see cumbersomeness. Below, we consider even more ggplot2 decision that would make our plot easier to consume and more beautiful.
day_labels = c("S", "M", "T", "W", "T", "F", "S")
df_year() |>
ggplot() +
aes(date = date) +
stat_calendar(color = "grey") +
ggplot2::aes(date = date) +
ggplot2::scale_y_reverse(breaks = 5:0,
expand = c(0,0),
limits = c(6.5, 0.5)) +
ggplot2::scale_x_continuous(breaks = 1:7,
labels = day_labels,
limits = c(.5, 7.5),
expand = c(0,0)
) +
ggplot2::facet_wrap(~lubridate::month(date, abbr = T, label = T), scales = "free") +
ggplot2::labs(x = NULL, y = NULL) +
ggplot2::theme(axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank()) +
ggplot2::theme(panel.grid.major = ggplot2::element_blank()) +
ggplot2::geom_blank()
Then, we bundle these up into defaults_calendar, which can be quickly added for converting to a more polished and readable calendar.
knitrExtra::chunk_to_dir("theme_grey_calendar")
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
theme_grey_calendar <- function(...){
theme_grey(...) %+replace%
ggplot2::theme(
axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
axis.ticks.x = ggplot2::element_blank(),
axis.title = element_blank()) +
ggplot2::theme(
panel.grid.major = ggplot2::element_blank())
}
scale_y_calendar <- function(...){ggplot2::scale_y_reverse(breaks = 6:0,
expand = c(0,0),
limits = c(6.5, 0.5), ...)}
scale_x_calendar <- function(day_labels = c("M", "T", "W", "T", "F", "S", "S"), ...){
ggplot2::scale_x_continuous(breaks = 1:7,
labels = day_labels,
limits = c(.5, 7.5),
expand = c(0,0), ...)}
facet_calendar <- function(...){
ggplot2::facet_wrap(~lubridate::month(date, abbr = T, label = T),
scales = "free",...)
}
geom_calendar_blank <- function(...){
stat_calendar(geom = "blank", ...)
}
knitrExtra:::chunk_to_r("defaults_calendar")
#' Title
#'
#' @param day_labels
#'
#' @return
#' @export
#'
#' @examples
defaults_calendar <- function(day_labels = c("M", "T", "W", "T", "F", "S", "S")){
week_start <- getOption("lubridate.week.start", 7)
if(week_start != 1){day_labels <- day_labels[c(week_start:7, 1:(week_start-1))]}
list(scale_y_calendar(),
scale_x_calendar(day_labels = day_labels),
facet_calendar(),
theme_grey_calendar(),
stat_calendar(geom = "blank")
)
}
Let’s check it out…
df_week() |>
ggplot() +
aes(date = date) +
stat_calendar() +
defaults_calendar()
df_year() |>
ggplot() +
aes(date = date) +
stat_calendar() +
defaults_calendar()
Furthermore, we provide ggcalendar as an alternative point of entry into the ggplot framework. The default data frame is even included (the current calendar year), so a full calendar will print with no additional specification.
knitrExtra:::chunk_to_r("ggcalendar")
#' Title
#'
#' @param dates_df
#' @param day_labels
#' @param geom
#' @param color
#' @param size
#' @param alpha
#'
#' @return
#' @export
#'
#' @examples
ggcalendar <- function(dates_df = df_year(),
day_labels = c("M", "T", "W", "T", "F", "S", "S"),
geom = "text",
color = "grey35",
size = 3,
alpha = 1){
my_layer <- stat_calendar(geom = geom,
color = color,
ggplot2::aes(date = date),
size = size,
alpha = alpha,
show.legend = F)
ggplot2::ggplot(data = dates_df) +
defaults_calendar(day_labels = day_labels) +
ggplot2::aes(date = date) +
my_layer
}
Let’s check it out!
ggcalendar()
ggcalendar() +
stat_calendar(geom = "point",
data = df_week(),
color = "darkred",
size = 5,
alpha = .5)
options(lubridate.week.start = 1)
ggcalendar() +
stat_calendar(geom = "point",
data = df_week(),
color = "darkred",
size = 5,
alpha = .5)
library(magrittr)
ggcalendar() +
# remember default data in ggcalendar() is current year of dates
aes(date = date) +
geom_tile_calendar(data = . %>%
filter(wday(date) == 3),
fill = "blue",
alpha = .2) +
labs(title = "When to do #TidyTuesday in 2024") +
stat_calendar(label = "X",
color = "darkred",
size = 5,
data = df_dates_interval(
"2024/01/01", Sys.Date() - days(1)),
alpha = .35)
df_month(year = 2023, month = 2) |>
ggcalendar()
df_month(year = 2023, month = 2) |>
ggcalendar()
df_month(year = 2023, month = 2) |>
ggcalendar(geom = "blank") +
aes(date = date) +
geom_text_calendar(label = "Another\nday...", # override default
size = 4)
df_month(year = 2023, month = 2) |>
ggcalendar() +
aes(date = date) +
geom_text_calendar() +
geom_point_calendar(data = . %>% filter(wday(date) %in% 2:6),
alpha = .2,
size = 5,
color = "cadetblue") +
theme(panel.background = element_rect(fill = "beige"))
library(ggplot2)
df_dates_interval("2023-09-01", "2023-12-31") |>
ggcalendar()
## basic example code
c("2022-03-19", "2022-04-09", "2022-05-07",
"2022-06-11", "2022-07-16") %>%
tibble(date = .) |>
mutate(date = date %>% as_date) |>
mutate(future = Sys.Date() < date) ->
events
df_year(2022) |>
ggcalendar() +
aes(date = date) +
geom_text_calendar() +
geom_point_calendar(data = events,
aes(color = future),
size = 8,
alpha = .5,
show.legend = F) +
labs(title = "nu2ggplot2X^2sion, 2022")
Airline on-time data for all flights departing NYC in 2013. Also includes useful ‘metadata’ on airlines, airports, weather, and planes.
Data inspiration: https://twitter.com/rappa753/status/1545729747774308354 @rappa753
# example
nycflights13::flights |>
ungroup() |>
mutate(date = as.Date(time_hour)) |>
filter(year(date) == 2013) |>
count(date) |>
ggcalendar() +
aes(date = date) +
geom_point_calendar(data = . %>% tibble(), aes(size = n,
color = n),
alpha = .7, show.legend = F) +
scale_color_viridis_c(option = "inferno", direction = 1) +
scale_size(range = c(3,8)) +
geom_text_calendar(aes(label = n), size = 2) +
NULL
births <- "https://raw.githubusercontent.com/EvaMaeRey/tableau/9e91c2b5ee803bfef10d35646cf4ce6675b92b55/tidytuesday_data/2018-10-02-us_births_2000-2014.csv"
readr::read_csv(births) |>
mutate(month = stringr::str_pad(month, 2, pad = "0"),
date_of_month = str_pad(date_of_month, 2, pad = "0")) |>
mutate(date = paste(year, month, date_of_month, sep = "-") |> as_date()) |>
filter(year == 2012) |>
ggcalendar() +
aes(date = date) +
geom_point_calendar(alpha = .4) +
aes(size = births) +
aes(color = births) +
scale_color_viridis_c() +
guides(
colour = guide_legend("Births"),
size = guide_legend("Births")
) +
geom_point_calendar(data = data.frame(date =
as_date("2012-12-25")),
size = 5, color = "red", shape = 21)
The following feels a little weird to me, but is allowed.
A grammar of graphics fundamental is that a statistical graphic are composed of geometries/marks that take on aesthetics (color, position, size), to represent a variable.
Below we aren’t aren’t fully stating these specifications; which feels a bit funny; I would not recommend this as a starting point.
ggcalendar() +
geom_text_calendar()
usethis::use_package("lubridate")
usethis::use_package("ggplot2")
usethis::use_package("dplyr")
usethis::use_package("stringr")
devtools::check()
devtools::install(pkg = ".", upgrade = "never")
rm(list = ls())
library(ggcalendar)
library(tidyverse)
options(lubridate.week.start = 1) # start on Monday
ggcalendar() +
labs(title = "Calendar: 2024")
ggcalendar() +
geom_tile_calendar(
data = df_week(),
fill = "red",
alpha = .25) +
geom_point_calendar(
data = df_today(),
color = "goldenrod3", shape = 21,
size = 8, stroke = 1.5
)
# example
nycflights13::flights |>
ungroup() |>
mutate(date = as.Date(time_hour)) |>
filter(year(date) == 2013) |>
count(date) |>
ggcalendar(geom = "blank") +
aes(date = date) +
geom_tile_calendar(
aes(fill = n),
alpha = .7, show.legend = F) +
scale_fill_viridis_c(option = "inferno",
direction = 1) +
scale_size(range = c(3,8)) +
geom_text_calendar(aes(label = n),
size = 2) +
NULL
contrast <- function(colour) {
out <- rep("grey35", length(colour))
light <- farver::get_channel(colour, "l", space = "hcl")
out[light < 50] <- "grey80"
out
}
aes_autocontrast_color_on_fill <- aes(colour = after_scale(contrast(fill)))
library(ggcalendar)
nhl_player_births <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-01-09/nhl_player_births.csv')
nhl_player_births |>
mutate(date = birth_date %>%
str_replace("....", "2024") %>%
as_date()) %>%
count(date) %>%
ggcalendar(geom = "blank") +
aes(date = date, # date is positional aes
fill = n) +
labs(title = "Number of NHL Player Birthdays by day 1879-2005\nas celebrated in 2024") +
geom_tile_calendar(alpha = .85, linewidth = 0) +
scale_fill_viridis_c()
last_plot() +
aes(label = n) +
geom_text_calendar(size = 3) +
aes_autocontrast_color_on_fill +
guides(fill = "none")
## basic example code
df_month(month = "2022-07") |>
head()
return_dates_interval(start_date = "2022-07-01",
end_date = "2022-08-31") |>
ggcalendar() +
aes(date = date) +
geom_text_calendar(size = 8) +
geom_point_calendar(data = . %>% filter(date == "2022-07-04"),
size = 8,
alpha = .5) +
geom_point_calendar(data = . %>% filter(date < Sys.Date()),
size = 10, shape = "x")
knitr::knit_exit()