-
Notifications
You must be signed in to change notification settings - Fork 0
/
figure-timings-first.R
119 lines (115 loc) · 3.25 KB
/
figure-timings-first.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
library(data.table)
library(ggplot2)
timing.dt.list <- list()
Subjects <- c(
log="Character vector",
sacct="Two data.frame columns")
pkg.colors <- c(
namedCapture="#E41A1C",#red
tidyr="#377EB8",#blue
rex="#4DAF4A",#green
rematch2="#984EA3",#purple
utils="#FF7F00",#orange
base="#FF7F00",#orange
"#FFFF33",#yellow
stringi="#A65628",#brown
re2r="#F781BF",#pink
stringr="#999999")#grey
for(task in names(Subjects)){
timing.rds <- paste0(task, ".rds")
if(file.exists(timing.rds)){
dt <- readRDS(timing.rds)
if("seconds" %in% names(dt)){
dt[, seconds := NULL]
}
timing.dt.list[[task]] <- data.table(
task, Subject=Subjects[[task]], dt)
}
}
timing.dt <- do.call(rbind, timing.dt.list)
timing.dt[, seconds := time/1e9]
timing.dt[, expr.chr := paste(expr)]
t2 <- namedCapture::df_match_variable(
timing.dt, expr.chr=list(
pkg=".*?",
"::",
fun="[^(]*",
list(
"\\(",
param="[^(]+",
"\\)"
), "?"))
stats.dt <- t2[, list(
median=median(seconds),
q25=quantile(seconds, 0.25),
q75=quantile(seconds, 0.75)
), by=list(task, Subject, pkg=expr.chr.pkg, param=expr.chr.param, subject.size, expr)]
stats.dt[, label := ifelse(grepl("utils", expr), paste(expr), ifelse(
grepl("^base", expr),
sub("FALSE", "F", sub("TRUE", "T", sub("^base::", "", expr))),
sub("::.*", "", expr)))]
stats.dt[, label.param := ifelse(
param=="", label, paste0(label, "\n", param))]
dl <- ggplot()+
theme_bw()+
theme(panel.spacing=grid::unit(0, "lines"))+
facet_grid(. ~ Subject, labeller=label_both)+
scale_y_log10("Time to compute first match
in each subject (seconds)")+
scale_color_manual(values=pkg.colors)+
scale_fill_manual(values=pkg.colors)+
coord_cartesian(
xlim=c(10^1.8, 10^6.5),
ylim=c(1e-3, 1e1),
expand=FALSE)+
scale_x_log10(
"Number of subjects",
breaks=10^(2:5))+
geom_ribbon(aes(
subject.size, ymin=q25, ymax=q75, fill=pkg, group=label.param),
data=stats.dt,
alpha=0.5)+
geom_line(aes(
subject.size, median, color=pkg, group=label.param),
data=stats.dt)+
directlabels::geom_dl(aes(
subject.size, median, color=pkg,
label=label.param),
method=list(cex=0.65, "last.polygons"),
data=stats.dt)+
guides(color="none",fill="none")
pdf("figure-timings-first.pdf", 6, 2.5)
print(dl)
dev.off()
dl.linetype <- ggplot()+
theme_bw()+
theme(panel.spacing=grid::unit(0, "lines"))+
facet_grid(. ~ Subject, labeller=label_both)+
scale_y_log10("Time to compute first match
in each subject (seconds)")+
coord_cartesian(
xlim=c(10^1.8, 10^6.5),
ylim=c(1e-3, 1e1),
expand=FALSE)+
scale_x_log10(
"Number of subjects",
breaks=10^(2:5))+
geom_ribbon(aes(
subject.size, ymin=q25, ymax=q75, group=label.param),
data=stats.dt,
alpha=0.5)+
geom_line(aes(
subject.size, median, linetype=pkg, group=label.param),
data=stats.dt)+
directlabels::geom_dl(aes(
subject.size, median,
label=paste0(label.param, " ")),
method=list(cex=0.65, box.color="grey", "last.polygons"),
data=stats.dt)+
guides(color="none",fill="none", linetype="none")
pdf("figure-timings-first-linetype.pdf", 6, 2.5)
print(dl.linetype)
dev.off()
png("figure-timings-first-linetype.png", 6, 2.5, units="in", res=200)
print(dl.linetype)
dev.off()