|
| 1 | +--- |
| 2 | +title: "Statistical Inference" |
| 3 | +author: "Jake Hofman" |
| 4 | +date: "April 20, 2017" |
| 5 | +output: |
| 6 | + html_document: |
| 7 | + toc: true |
| 8 | + toc_depth: 2 |
| 9 | +--- |
| 10 | + |
| 11 | +```{r} |
| 12 | +library(tidyverse) |
| 13 | +#library(ggplot2) |
| 14 | +#library(reshape) |
| 15 | +#library(dplyr) |
| 16 | +
|
| 17 | +theme_set(theme_bw()) |
| 18 | +
|
| 19 | +set.seed(42) |
| 20 | +``` |
| 21 | + |
| 22 | +# Estimating a proportion |
| 23 | +## Point estimate and sampling distribution |
| 24 | +Repeatedly flip a biased coin 100 times and estimate its bias. |
| 25 | +Adapted from Yakir 11.2.3. |
| 26 | +```{r} |
| 27 | +estimate_coin_bias <- function(n, p) { |
| 28 | + mean(rbinom(n,1,p)) |
| 29 | +} |
| 30 | +
|
| 31 | +n <- 100 |
| 32 | +p <- 0.3 |
| 33 | +p_hat <- replicate(1e5, estimate_coin_bias(n, p)) |
| 34 | +
|
| 35 | +# plot the sampling distribution |
| 36 | +qplot(x=p_hat, geom="histogram", binwidth=0.01) + |
| 37 | + geom_vline(xintercept=p) + |
| 38 | + geom_vline(xintercept=mean(p_hat), linetype=2, color="red") |
| 39 | +
|
| 40 | +# repeat this for different sample sizes |
| 41 | +plot_data <- data.frame() |
| 42 | +for (n in c(100, 200, 400, 800)) { |
| 43 | + tmp <- data.frame(n=n, p_hat=replicate(1e5, estimate_coin_bias(n, p))) |
| 44 | + plot_data <- rbind(plot_data, tmp) |
| 45 | +} |
| 46 | +
|
| 47 | +qplot(data=plot_data, x=p_hat, geom="histogram", binwidth=0.01, facets = . ~ n) |
| 48 | +
|
| 49 | +se <- plot_data %>% |
| 50 | + group_by(n) %>% |
| 51 | + summarize(se=sd(p_hat)) |
| 52 | +qplot(data=se, x=n, y=se) + |
| 53 | + stat_function(fun=function(n) {sqrt(p * (1 - p) / n)}, linetype=2) |
| 54 | +
|
| 55 | +``` |
| 56 | + |
| 57 | +## Confidence intervals |
| 58 | +```{r} |
| 59 | +set.seed(42) |
| 60 | +n <- 100 |
| 61 | +p <- 0.3 |
| 62 | +p_hat <- replicate(1e5, estimate_coin_bias(n, p)) |
| 63 | +
|
| 64 | +# compute upper and lower confidence intervals |
| 65 | +LCL <- p_hat - 1.96*sqrt(p_hat*(1-p_hat)/n) |
| 66 | +UCL <- p_hat + 1.96*sqrt(p_hat*(1-p_hat)/n) |
| 67 | +
|
| 68 | +# check how often the true proportion is contained in the estimated confidence interval |
| 69 | +mean(p >= LCL & p <= UCL) |
| 70 | +
|
| 71 | +# plot 100 confidence intervals and the true value |
| 72 | +plot_data <- data.frame(p_hat, LCL, UCL)[1:100,] |
| 73 | +plot_data <- transform(plot_data, contains_p=(p >= LCL & p <= UCL)) |
| 74 | +ggplot(data=plot_data, aes(x=1:nrow(plot_data), y=p_hat, color=contains_p)) + |
| 75 | + geom_pointrange(aes(ymin=LCL, ymax=UCL)) + |
| 76 | + geom_hline(yintercept=p, linetype=2) + |
| 77 | + xlab('') + |
| 78 | + scale_color_manual(values=c("red","darkgreen")) + |
| 79 | + theme(legend.position="none") |
| 80 | +``` |
| 81 | + |
| 82 | + |
| 83 | +## Hypothesis testing |
| 84 | +```{r} |
| 85 | +# construct a null distribution: what would happen if the coin were fair? |
| 86 | +set.seed(42) |
| 87 | +n <- 100 |
| 88 | +p0_hat <- replicate(1e5, estimate_coin_bias(n, p=0.5)) |
| 89 | +
|
| 90 | +# now conduct one experiment with 100 rolls from a biased coin |
| 91 | +p_hat <- estimate_coin_bias(n, p=0.3) |
| 92 | +
|
| 93 | +# plot the null distribution and see where the observed estimate lies in it |
| 94 | +qplot(x=p0_hat, geom="histogram", binwidth=0.01) + |
| 95 | + geom_vline(xintercept=p_hat, linetype=2, color="red") |
| 96 | +
|
| 97 | +# compare this to our experiment |
| 98 | +# how likely is it that we would see an estimate this extreme if the coin really were fair? |
| 99 | +num_as_extreme <- sum(p0_hat <= p_hat) |
| 100 | +p_value <- num_as_extreme / length(p0_hat) |
| 101 | +p_value |
| 102 | +``` |
| 103 | +Only `r num_as_extreme` out of `r length(p0_hat)` estimates from a fair coin with p=0.5 would result in an estimate of p_hat=`r p_hat` or smaller, corresponding to a p-value of `r p_value`. |
| 104 | + |
| 105 | + |
| 106 | +# Comparing two proportions |
| 107 | +## Point estimates and sampling distributions |
| 108 | +Repeatedly flip two coins, each 500 times and estimate their bias. |
| 109 | +```{r} |
| 110 | +estimate_coin_bias <- function(n, p) { |
| 111 | + mean(rbinom(n,1,p)) |
| 112 | +} |
| 113 | +
|
| 114 | +pa <- 0.12 |
| 115 | +pb <- 0.08 |
| 116 | +n <- 500 |
| 117 | +pa_hat <- replicate(1e5, estimate_coin_bias(n, pa)) |
| 118 | +pb_hat <- replicate(1e5, estimate_coin_bias(n, pb)) |
| 119 | +
|
| 120 | +# wrangle the results into one data frame |
| 121 | +plot_data <- rbind(data.frame(split='A', trial=1:length(pa_hat), p_hat=pa_hat), |
| 122 | + data.frame(split='B', trial=1:length(pb_hat), p_hat=pb_hat)) |
| 123 | +
|
| 124 | +# plot the sampling distributions for each split |
| 125 | +ggplot(data=plot_data, aes(x=p_hat, fill=split)) + |
| 126 | + geom_histogram(position="identity", binwidth=0.002, alpha=0.5) + |
| 127 | + scale_alpha(guide=F) |
| 128 | +
|
| 129 | +# plot the sampling distribution of the difference |
| 130 | +qplot(x=pa_hat-pb_hat, geom="histogram", binwidth=0.002) + |
| 131 | + geom_vline(xintercept=pa-pb) + |
| 132 | + geom_vline(xintercept=mean(pa_hat-pb_hat), linetype=2, color="red") |
| 133 | +
|
| 134 | +# note that variances add for independent random variables |
| 135 | +variance_of_difference <- var(pa_hat - pb_hat) |
| 136 | +sum_of_variances <- var(pa_hat) + var(pb_hat) |
| 137 | +``` |
| 138 | + |
| 139 | +## Confidence intervals |
| 140 | +```{r} |
| 141 | +# plot 100 confidence intervals by split |
| 142 | +plot_data <- transform(plot_data, |
| 143 | + LCL = p_hat - 1.96*sqrt(p_hat*(1-p_hat)/n), |
| 144 | + UCL = p_hat + 1.96*sqrt(p_hat*(1-p_hat)/n)) |
| 145 | +plot_data <- subset(plot_data, trial <= 100) |
| 146 | +ggplot(data=plot_data, aes(x=trial, y=p_hat, linetype=split, position="dodge")) + |
| 147 | + geom_pointrange(aes(ymin=LCL, ymax=UCL)) + |
| 148 | + xlab('') + |
| 149 | + theme(legend.title=element_blank()) |
| 150 | +``` |
| 151 | + |
| 152 | +## Hypothesis testing |
| 153 | +```{r} |
| 154 | +# construct a null distribution: what would happen if both coins had the same bias (e.g., A and B are the same)? |
| 155 | +p0a <- 0.08 |
| 156 | +p0b <- 0.08 |
| 157 | +n <- 500 |
| 158 | +dp0_hat <- replicate(1e5, estimate_coin_bias(n, p0a)) - |
| 159 | + replicate(1e5, estimate_coin_bias(n, p0b)) |
| 160 | +
|
| 161 | +# run one experiment where there is an underlying difference |
| 162 | +pa <- 0.12 |
| 163 | +pb <- 0.08 |
| 164 | +dp_hat <- estimate_coin_bias(n, pa) - estimate_coin_bias(n, pb) |
| 165 | +
|
| 166 | +# plot the null distribution and see where the observed estimate lies in it |
| 167 | +qplot(x=dp0_hat, geom="histogram", binwidth=0.01) + |
| 168 | + geom_vline(xintercept=dp_hat, linetype=2, color="red") |
| 169 | +
|
| 170 | +# compare this to our experiment |
| 171 | +# how likely is it that we would see an estimate this extreme both coins were identical? |
| 172 | +num_as_extreme <- sum(dp0_hat >= dp_hat) |
| 173 | +p_value <- num_as_extreme / length(dp0_hat) |
| 174 | +p_value |
| 175 | +``` |
| 176 | +Only `r num_as_extreme` out of `r length(dp0_hat)` estimates from two identical coins with p=0.08 would result in an estimate of dp_hat=`r dp_hat` or smaller, corresponding to a p-value of `r p_value`. |
| 177 | + |
| 178 | +# Power calculations |
| 179 | +## Computing sample size using built-in R functions |
| 180 | +```{r} |
| 181 | +# use power.prop.test to compute the sample size you need |
| 182 | +power.prop.test(p1=0.08, p2=0.12, sig.level=0.05, power=0.80, alternative="one.sided") |
| 183 | +``` |
| 184 | + |
| 185 | +## Computing power by direct simulation |
| 186 | +```{r} |
| 187 | +run_experiment <- function(pa, pb, n, alpha) { |
| 188 | + na <- sum(rbinom(n, 1, pa)) |
| 189 | + nb <- sum(rbinom(n, 1, pb)) |
| 190 | + test <- prop.test(x = c(na, nb), n = c(n, n), alternative="greater", conf.level = alpha) |
| 191 | + test$p.value < alpha |
| 192 | +} |
| 193 | +
|
| 194 | +compute_power <- function(pa, pb, n, alpha, r = 1000) { |
| 195 | + mean(replicate(r, run_experiment(pa, pb, n, alpha))) |
| 196 | +} |
| 197 | +
|
| 198 | +pa <- 0.12 |
| 199 | +pb <- 0.08 |
| 200 | +alpha <- 0.05 |
| 201 | +
|
| 202 | +N <- seq(100,1000, by = 100) |
| 203 | +pow <- c() |
| 204 | +for (n in N) { |
| 205 | + pow <- c(pow, compute_power(pa, pb, n, alpha)) |
| 206 | +} |
| 207 | +
|
| 208 | +qplot(N, pow) |
| 209 | +``` |
0 commit comments