02-EP17.Rmd

```{r Setup, echo = FALSE}
suppressPackageStartupMessages({
  library(tidyverse)
})

options(dplyr.summarise.inform = FALSE)
```

# EP17 - Detection Capabilities
Evaluation of Detection Capability for Clinical Laboratory Measurement Procedures

This work is based on EP17-A2, June 2012.

Simple, elegant code with minimal duplication that can be adapted for other uses.

## Appendix A
Evaluation of Limits of Blank and Detection by the Classical Approach


### Table A1
Observed Blank Sample Results for Reagent Lot 1 (Units are pg/mL)
```{r Table A1}
# Table A1. Observed Blank Sample Results for Reagent Lot 1 (Units are pg/mL)
table_a1 <- tibble(
  day = rep(1:3, each = 4),
  replicate = rep(1:4, times = 3),
  `Blank 1` = c( 2.6, -0.8, 5.5, 6.0, 4.5, 0.6, -2.3,   3.4, 5.9, 7.6,  4.1, -1.4),
  `Blank 2` = c( 1.0,  2.9, 4.9, 8.0, 6.9, 5.0,  3.4,   1.2, 6.5, 5.6, -2.2,  2.3),
  `Blank 3` = c(-4.4, -3.4, 7.0, 6.9, 4.3, 3.2, -1.4,  -4.2, 5.9, 7.6,  3.8,  5.8),
  `Blank 4` = c( 1.5, -1.9, 5.1, 5.7, 4.1, 4.5, -0.6,   0.5, 5.4, 7.6,  4.4,  6.6),
  `Blank 5` = c( 1.2, -0.7, 6.1, 5.1, 4.8, 3.3, -2.8,  -1.4, 8.7, 3.6,  5.1,  3.5)
)

```

### Table A2
Observed Low Level Sample Results for Reagent Lot 1 (Units are pg/mL)
```{r Table A2}
# Table A2. Observed Low Level Sample Results for Reagent Lot 1 (Units are pg/mL)
table_a2 <- tibble(
  day = rep(1:3, each = 4),
  replicate = rep(1:4, times = 3),
  `Low 1` = c(21.0, 22.8, 28.2, 25.9, 26.4, 28.3, 20.7, 21.9, 24.7, 22.5, 28.5, 29.2),
  `Low 2` = c(13.3, 12.6, 18.2, 14.7, 17.8, 14.0, 14.1, 12.5, 11.3, 12.2, 16.2, 13.9),
  `Low 3` = c(12.8, 12.9, 17.4, 16.0, 15.9, 14.1, 11.3,  9.4, 10.6, 13.6, 17.6, 14.9),
  `Low 4` = c(17.3, 19.2, 21.5, 22.2, 24.1, 25.8, 16.0, 16.4, 24.9, 23.8, 22.1, 26.1),
  `Low 5` = c(19.2, 22.7, 28.3, 26.2, 25.1, 30.3, 23.4, 19.2, 26.3, 23.1, 27.5, 30.1)
)
```

### Table A3
Observed Blank Sample Results for Reagent Lot 2 (Units are pg/mL)
```{r Table A3}
# Table A3. Observed Blank Sample Results for Reagent Lot 2 (Units are pg/mL)
table_a3 <- tibble(
  day = rep(1:3, each = 4),
  replicate = rep(1:4, times = 3),
  `Blank 1` = c(4.6,  4.1,  1.6, 3.7, 2.2, 0.7, 4.6, 2.6, 1.1, -4.4, 0.9,   0.7),
  `Blank 2` = c(9.2,  8.3,  4.8, 5.4, 4.8, 6.3, 5.4, 9.6, 7.7,  3.1, 6.1,  10.0),
  `Blank 3` = c(6.1,  3.2,  3.9, 1.4, 3.1, 4.1, 1.0, 3.4, 0.1,  0.4, 2.9,  -1.6),
  `Blank 4` = c(4.0, 11.5,  4.5, 3.6, 4.4, 6.8, 7.1, 4.2, 3.7,  3.7, 5.3,   4.5),
  `Blank 5` = c(4.0,  6.2, -0.2, 2.3, 1.6, 2.6, 6.4, 5.7, 4.2,  3.7, 1.4,   1.5) 
)
```

### Table A4
Observed Low Level Sample Results for Reagent Lot 2 (Units are pg/mL)
```{r Table A4}
# Table A4. Observed Low Level Sample Results for Reagent Lot 2 (Units are pg/mL)
table_a4 <- tibble(
  day = rep(1:3, each = 4),
  replicate = rep(1:4, times = 3),
  `Low 1` = c(22.0, 22.5, 21.8, 22.1, 20.3, 21.0, 25.3, 26.0, 27.2, 25.1, 25.3, 25.3),
  `Low 2` = c(15.6, 21.2, 14.8, 14.9, 16.0, 15.8, 21.6, 22.8, 15.3, 18.7, 18.3, 19.5),
  `Low 3` = c(13.0, 15.9,  9.0,  7.0, 13.4,  8.5, 16.3, 18.1, 12.4, 11.1, 11.3, 10.1),
  `Low 4` = c(18.8, 17.6, 14.1, 14.9, 19.2, 15.8, 19.8, 21.4, 18.0, 18.0, 19.6, 23.1),
  `Low 5` = c(32.9, 30.4, 29.4, 27.6, 27.7, 30.6, 31.4, 30.4, 32.5, 28.9, 29.8, 35.1)
)
```

### Tidy Table Cleanup
Before we move on, we can make our job significantly easier going forward by reorganizing our tables into a tidy, rowwise format. 

```{r Cleanup Tables A1-A4 into tidy format}

# Add the reagent lots, pivot to a longer format, and convert non-value columns to factors
tables_a14 <- map2_dfr(.x = list(table_a1, table_a2, table_a3, table_a4),  # The tables we need to clean up
                     .y = list("Lot 1", "Lot 1", "Lot 2", "Lot 2"),  # The lots of reagent that map to each of the tables
                     .f = ~ mutate(.x, reagent_lot = .y) %>%
                       pivot_longer(cols = starts_with(c("Blank", "Low")), 
                                    names_to = "sample", 
                                    values_to = "value")) %>%
  separate(sample, into = c("sample_type", "sample_number"), sep = " ", remove = FALSE) %>%
  mutate_at(vars(-value), as.factor)

glimpse(tables_a14)

```

### Table A5
Rank Positions and LoB from Blank Sample Test Results
```{r Table A5}

alpha <- 0.05  # Type I error risk
pct_B <- 1 - alpha

B <- nlevels(tables_a14$day) *        # Number of days
  nlevels(tables_a14$sample_number) * # Number of unique samples
  nlevels(tables_a14$replicate)       # Number of replicates

rank_position <- 0.5 + (B * pct_B)

# Reproduce table A5  
tables_a14 %>%
  group_by(reagent_lot, sample_type) %>%
  mutate(rank_position = rank(value, ties.method = "random")) %>%
  filter(sample_type == "Blank",
        rank_position %in% 56:60) %>% 
  ungroup() %>%
  arrange(sample_type, reagent_lot, rank_position) %>%
  select(rank_position, reagent_lot, value) %>%
  pivot_wider(names_from = reagent_lot, values_from = value)

# Type 2 quantiles provide averaging at discontinuities
limits_nonparametric <- tables_a14 %>%
  group_by(reagent_lot, sample_type) %>%
    filter(sample_type == "Blank") %>%
  summarize(LoB = quantile(value, 1 - alpha, type = 2), .groups = "drop")  

limits_nonparametric
```


### Table A6
SDs and LoD Calcuations From Low Level Sample Test Results
```{r Table A6}

z <- qnorm(1-alpha)  # 95th percentile of normal distribution
#z <- round(z, digits = 3)

# Degrees of freedom, total measurements less the number of unique samples
df <- B - nlevels(tables_a14$sample_number)

# "Multiplier factor" as per eqn. 6
c_p = z / (1 - ( 1 / (4 * df)))

table_a6 <- tables_a14 %>%
  filter(sample_type == "Low") %>%
  group_by(sample, reagent_lot) %>%
  summarize(n = n(), SD = sd(value), .groups = "drop") %>%
  arrange(reagent_lot)

table_a6

limits_nonparametric <- table_a6 %>% 
  group_by(reagent_lot) %>%
  summarize(SD_L = sqrt(sum((n - 1) * SD^2) / sum(n-1)), .groups = "drop") %>%
  left_join(limits_nonparametric, by = "reagent_lot") %>%
  mutate(LoD = round(LoB + c_p * SD_L, 1))

## I believe there to be an error in this table - CD, 16-July-2020

limits_nonparametric

```

### Figure A1
Histogram of Combined Blank Sample Measurements
```{r Figure A1}

tables_a14 %>%
  filter(sample_type == "Blank") %>%
  ggplot(aes(x = value)) +
  geom_histogram(binwidth = 0.5, color = "black", fill = "lightgrey",
                 boundary = 0.5, closed = "left") +  # Match the binning-style that CLSI uses
  scale_x_continuous(breaks = seq(from = -6, to = 12, by = 2), limits = c(-6, 12), name = "") +
  scale_y_continuous(breaks = seq(2, 10, by = 2), name = "Count")

shapiro.test(tables_a14 %>%
  filter(sample_type == "Blank") %>%
  pull(value) 
)
```

### Table A7
LoB Calculations Using Parametric Data Analysis Options
```{r Table A7}

table_a7 <- tables_a14 %>%
  filter(sample_type == "Blank") %>%
  group_by(reagent_lot) %>%
  summarize(M_B = round(mean(value), 2),
            SD_B = round(sd(value), 2),
            c_p = c_p,
            .groups = "drop") %>%
  mutate(LoB = round(M_B + c_p * SD_B, 1))

# Row 1 of this table doesn't allign with the document
table_a7

```


```{r Calculate LoD parametrically}
limits_parametric <- table_a6 %>% 
  group_by(reagent_lot) %>%
  summarize(SD_L = sqrt(sum((n - 1) * SD^2) / sum(n-1)), .groups = "drop") %>%
  right_join(table_a7, by = "reagent_lot") %>%
  mutate(LoD = round(LoB + c_p * SD_L, 1))

limits_parametric


```


```{r Appendix A Error Checking, echo = FALSE, include = FALSE}
# Confirm we've reproduced the values in the text
stopifnot(
  near(
    limits_nonparametric$LoB, 
    c(7.6, 9.4)
  ))

stopifnot(
  near(
    limits_nonparametric$LoD,
    c(12.7, 13.8)    # Values according to my math  
    # c(14.5, 13.8)  # Values according to document
  ))

stopifnot(
  near(
    limits_parametric$LoB,
    c(8.8, 8.6)
  ))

stopifnot(
  near(
    limits_parametric$LoD,
    c(13.9, 13.0)    # Values according to my math  
    # c(13.9, 13.2)  # Values according to document
  ))
```

## Appendix B
Evaluation of Limit of Detection by the Precision Profile Approach

### Table B1
Observed Within-Laboratory Precision and Mean Concentration for All Samples

```{r Table B1}

table_b1 <- tibble(
  sample_id = LETTERS[1:6],
  mean.lot1 = c(0.69, 1.42, 2.65, 4.08, 6.08, 10.36),
  SD_WL.lot1 = c(0.39, 0.39, 0.46, 0.55, 0.64, 1.12),
  mean.lot2 = c(0.78, 1.73, 2.89, 3.82, 6.33, 10.92), 
  SD_WL.lot2 = c(0.29, 0.54, 0.55, 0.63, 0.82, 1.38))

table_b1

```

### Tidy Table Cleanup
```{r Tidy Table Cleanup, Table B1, rows.print = 12}

table_b1 <- table_b1 %>%
  pivot_longer(-sample_id) %>%
  separate(name, sep = "\\.", into = c("name", "reagent_lot")) %>%
  pivot_wider(names_from = name, values_from = value) %>%
  mutate(reagent_lot = str_replace(reagent_lot, "lot", "Lot ")) %>%
  mutate_if(is.character, factor)

glimpse(table_b1)
```

### Figure B1
Precision Profiles of Within-Laboratory Precision (SD_WL) vs Measurand Concentration With Second-Order Polynomial Model Fit Overlays

```{r Figure B1}

fits <- table_b1 %>% 
  group_by(reagent_lot) %>%
  do(poly_fit = lm(formula = .$SD_WL ~ poly(.$mean, 2, raw = TRUE)))

# These are the same coefficients that Excel returns
# Presumably this is what the guideline means when it says 'done using a spreadsheet'
# They differ from the coefficients reported in the guideline

map(fits$poly_fit, ~ broom::tidy(.))

# Lot 1 Reported:    0.3741 / 0.0149 / 0.0055
# Lot 1 Calculated:  0.3756 / 0.0137 / 0.0056
# Lot 2 Reported:    0.2801 / 0.0817 / 0.0017
# Lot 2 Calculated:  0.3086 / 0.0747 / 0.0020

table_b1 %>%
  ggplot(aes(x = mean, y = SD_WL)) +
  geom_point(aes(color = reagent_lot, shape = reagent_lot), size = 3) +
  stat_smooth(method = "lm", formula = y ~ poly(x, 2), aes(color = reagent_lot, lty = reagent_lot), se = FALSE) +
  scale_shape_manual(values = c("diamond", "square"), name = "") +
  scale_color_discrete(name = "") +
  scale_x_continuous(name = "[Measurand] (ng/mL)",   limits = c(0, 12),  breaks = seq(0, 12,  by = 2)) +
  scale_y_continuous(name = "Within-Lab SD (ng/mL)", limits = c(0, 1.6), breaks = seq(0, 1.6, by = 0.2)) +
  scale_linetype_manual(values = c(1, 2), name = "")

```

### Equations B1-B3
```{r Equations B1-B3}

# Unable to calculate directly (either in R or via Excel), so input from text
coefficients <- tribble(
  ~ reagent_lot, ~ C0, ~ C1, ~ C2,
  "Lot 1", 0.3741, 0.0149, 0.0055, # From text
# "Lot 1", 0.3756, 0.0137, 0.0056, # Calculated
  "Lot 2", 0.2801, 0.0817, 0.0017) # From text
# "Lot 2", 0.3086, 0.0747, 0.0020) # Calculated


N <- 80  # Measurements per sample
K <- 6   # Number of samples
N_tot <- N * K  # Total measurements
alpha <- 0.05
Z <- qnorm(1 - alpha)
c_p <- Z / ( 1 - ( 1 / (4 * (N_tot - K))))

LoB <- 0.51  # From section 5.4.3.2

LoD <- coefficients %>%
  mutate(LoD = LoB + c_p * (C0 + C1 * LoB + C2 * LoB^2))

```

### Table B2
Trial LoD Values vs MC
```{r Table B2, rows.print = 19}

table_b2 <- tibble(
  mc = c(seq(from = 0.5, to = 1.3, by = 0.1), 
         seq(from = 1.1, to = 1.2, by = 0.01)),
  SD_WL.lot1 = c(0.383, 0.385, 0.387, 0.390, 0.392, 0.395, 0.397, 0.400, 0.403, 
                 0.397, 0.397, 0.398, 0.398, 0.398, 0.399, 0.399, 0.399, 0.399, 0.400, 0.400),
  SD_WL.lot2 = c(0.348, 0.355, 0.363, 0.371, 0.379, 0.387, 0.394, 0.402, 0.410,
                 0.394, 0.395, 0.396, 0.397, 0.398, 0.398, 0.399, 0.400, 0.401, 0.401, 0.402))

# Tidy the table
table_b2 <- table_b2 %>%
  distinct(mc, .keep_all = TRUE) %>%  # Remove the duplicate rows
  pivot_longer(-mc) %>%
  separate(name, into = c("name", "reagent_lot"), sep = "\\.") %>%
  pivot_wider(names_from = name, values_from = value) %>%
  mutate(reagent_lot = str_replace(reagent_lot, "lot", "Lot ")) %>%
  mutate_if(is.character, factor) %>%
  arrange(reagent_lot)

# Calculate the trial LoD and the bias at each concentration
table_b2 <- table_b2 %>%
  mutate(trial_LoD = round(LoB + c_p * SD_WL, 2),
         bias = round(trial_LoD - mc, 2)) 

table_b2

# LoDs are where there is no bias
table_b2 %>%
  filter(!bias) %>%
  select(reagent_lot, LoD = trial_LoD)

```

## Appendix C
Evaluation of Limit of Detection by the Probit Approach

### Table C1
Observed Proportions of Positive Test Results With the Planned Dilutions
```{r Table C1}

table_c1 <- tibble(
  concentration = c(0.000, 0.025, 0.050, 0.150, 0.300, 0.500),
  positive_total.lot1 = c("0/22", "23/32", "29/32", "32/32", "32/32", "32/32"),
  positive_total.lot2 = c("0/22", "28/32", "32/32", "32/32", "32/32", "32/32"),
  positive_total.lot3 = c("0/22", "27/32", "32/32", "32/32", "32/32", "32/32"),
  hit_rate.lot1 = map_dbl(positive_total.lot1, ~ eval(parse(text = .))),  # This is a quick (lazy) way of
  hit_rate.lot2 = map_dbl(positive_total.lot2, ~ eval(parse(text = .))),  # evaluating the fractions in
  hit_rate.lot3 = map_dbl(positive_total.lot3, ~ eval(parse(text = .))))  # the first three columns.

table_c1
```

### Table C2
Observed Proportions of Positive Test Results With Additional Dilutions
```{r Table C2}

table_c2 <- tibble(
  concentration = c(0.006, 0.014),
  positive_total.lot1 = c("11/30", "15/30"),
  positive_total.lot2 = c("12/30", "22/30"),
  positive_total.lot3 = c("22/34", "31/34"),
  hit_rate.lot1 = map_dbl(positive_total.lot1, ~ eval(parse(text = .))),  # This is a quick (lazy) way of
  hit_rate.lot2 = map_dbl(positive_total.lot2, ~ eval(parse(text = .))),  # evaluating the fractions in
  hit_rate.lot3 = map_dbl(positive_total.lot3, ~ eval(parse(text = .))))  # the first three columns.

table_c2
```

### Tidy Table Cleanup
```{r Tidy Table Cleanup for Appendix C}

tables_c12 <- table_c1 %>%
  bind_rows(table_c2) %>% 
  select(-starts_with("positive_total.lot")) %>%
  pivot_longer(-concentration) %>%
  separate(name, into = c("name", "reagent_lot"), sep = "\\.") %>%
  pivot_wider(names_from = name, values_from = value) %>%
  mutate(reagent_lot = str_replace(reagent_lot, "lot", "Lot ")) %>%
  arrange(reagent_lot, concentration)

glimpse(tables_c12)
```


### Table C3
Summaries of the Probit Analysis Results
```{r Table C3}

table_c3 <- tables_c12 %>%
  group_by(reagent_lot) %>%
  filter(concentration > 0) %>%
  summarize(probit_model = list(
    glm(formula = hit_rate ~ log(concentration), 
        family = quasibinomial(link = "probit"))),
    .groups = "drop") %>%
  mutate(intercept = map_dbl(probit_model, ~ coef(.)[[1]]),
         slope = map_dbl(probit_model, ~ coef(.)[[2]]),
         LoD = exp((qnorm(1-alpha) - intercept) / slope)) 

table_c3 %>%
  select(reagent_lot, LoD)
```


```{r Set up for probit figures}

plot_probit <- function(lot) {
  
  inv_probit_fn <- function(x) {
    
    intercept <- table_c3$intercept[table_c3$reagent_lot == lot]
    slope <- table_c3$slope[table_c3$reagent_lot == lot]
    
    exp((qnorm(1-alpha) - intercept) / slope)
  }
  
  tables_c12 %>%
    filter(reagent_lot == lot, concentration > 0) %>%
    ggplot(aes(x = concentration, y = hit_rate)) +
    geom_point() +
    # We fit to y ~ x because the scale_x_log10 is taken care of the log transformation
    geom_smooth(formula = y ~ x, color = "firebrick", fullrange = TRUE, 
                method = "glm", method.args = list(family = quasibinomial(link = "probit"))) +
    geom_hline(yintercept = 1-alpha, lty = 2) +
    geom_vline(xintercept = inv_probit_fn(1-alpha), lty = 2) +
    scale_x_log10(breaks = 10^(0:-3), limits = c(0.001, 1), name = "Concentration") +
    scale_y_continuous(name = "Probability", breaks = seq(0, 1, by = 0.1))
}

```

### Figure C1
Probit Analysis for Reagent Lot 1
```{r Figure C1}

plot_probit("Lot 1")

```

### Figure C2
Probit Analysis for Reagent Lot 2
```{r Figure C2}

plot_probit("Lot 2")

```

### Figure C3
Probit Analysis for Reagent Lot 3
```{r Figure C3}

plot_probit("Lot 3")

```

## Appendix D
Evaluation of Limit of Quantitation

### Table D1
Limit of Quantitation as Functional Sensitivity

```{r Table D1}

# Text lacks precision, so CVs don't agree
table_d1 <- tibble(
  sample_pool_id = paste("Pool ", 1:9),
  mean.lot1 = c(0.040, 0.053, 0.080, 0.111, 0.137, 0.164, 0.190, 0.214, 0.245),
  sd.lot1   = c(0.016, 0.016, 0.016, 0.017, 0.014, 0.012, 0.011, 0.016, 0.013),
  cv_calc.lot1 = sd.lot1/mean.lot1 * 100,  # CV, calculated using the reported values of mean and SD
  cv.lot1   = c(40.2,  29.6,  19.5,  15.1,  10.0,  7.4,   6.0,   7.5,   5.4), #  CV, as reported in text
  mean.lot2 = c(0.041, 0.047, 0.077, 0.106, 0.136, 0.159, 0.182, 0.205, 0.234),
  sd.lot2   = c(0.018, 0.014, 0.012, 0.019, 0.016, 0.015, 0.015, 0.016, 0.014),
  cv_calc.lot2   = sd.lot2/mean.lot2 * 100,
  cv.lot2   = c(44.1,  28.8,  15.1,  17.8,  11.4,  9.2,   8.4,   7.8,   6.2)
)
```


### Tidy Table Cleanup
```{r Tidy Table Cleanup, table D1}

table_d1 <- table_d1 %>%
  pivot_longer(-sample_pool_id) %>%
  separate(name, into = c("name", "reagent_lot"), sep = "\\.") %>%
  pivot_wider(names_from = name, values_from = value) %>%
  mutate(reagent_lot = str_replace(reagent_lot, "lot", "Lot ")) %>%
  mutate_if(is.character, factor) %>%
  arrange(reagent_lot, sample_pool_id)

glimpse(table_d1)

```

### Figure D1
Precision Profiles of Within Laboratory Precision (as %CV) vs. Measurand Concentration With a Power Function Model Fit Overlaid

```{r Figure D1}

accuracy_goal <- 10

# Picking the initial values is more of an art than a science
power_fits <- table_d1 %>%
  group_by(reagent_lot) %>%
  summarize(fit = list(lm(log(cv) ~ log(mean))),
            .groups = "drop") %>%
  mutate(C_0 = map_dbl(fit, ~ exp(coef(.)[[1]])),
         C_1 = map_dbl(fit, ~ coef(.)[[2]]),
         LoD = (accuracy_goal/C_0)^(1/C_1))


# Use a hacky map to make multiple stat_function layers
fit_lines <- pmap(
  list(c("solid", "dashed"),
       power_fits$C_0, 
       power_fits$C_1),
  ~ stat_function(lty = ..1, fun = function(x) { ..2 * x^..3 })
)

table_d1 %>%
  ggplot(aes(x = mean, y = cv)) +
  geom_point(aes(shape = reagent_lot, color = reagent_lot), size = 3) +
  scale_shape_manual(name = "", values = c("diamond", "square"), guide = FALSE) +
  scale_color_discrete(name = "", guide = FALSE) +
  scale_x_continuous(name = "[Troponin I] (ng/mL)", breaks = seq(0.025, 0.225, by = 0.05)) +
  scale_y_continuous(name = "Within-Lab Precision (%CV)", breaks = seq(0, 50, by = 10)) +
  fit_lines

power_fits %>%
  select(-fit) 

```

### Table D2
Observed Means and SDs for Low Level Samples from Appendix A data
```{r Table D2}

reference_values <- tibble(
  sample = paste("Low", 1:5),
  reference_value = c(26.1, 16.9, 13.1, 20.4, 27.8)
)

table_d2 <- tables_a14 %>%
  # Between examples, the author changed the name of the reagent lots
  mutate(reagent_lot = recode(reagent_lot, 
                              "Lot 1" = "Reagent A", 
                              "Lot 2" = "Reagent B")) %>%
  group_by(reagent_lot, sample) %>%
  summarize(observed_mean = mean(value),
            observed_sd = sd(value)) %>%
  inner_join(reference_values, by = "sample") %>%
  select(reagent_lot, sample, reference_value, observed_mean, observed_sd)

table_d2

```

### Table D3
TE Calculations for Low Level Samples from Appendix A Data
```{r Table D3}

table_d3 <- table_d2 %>%
  mutate(bias = observed_mean - reference_value,
         TE = (abs(bias) + 2 * observed_sd)/reference_value * 100) %>%
  select(reagent_lot, reference_value, sample, bias, TE)

table_d3 %>%
  group_by(sample) %>%
  summarize(average_TE = mean(TE))

```

### Figure D2
Plot of Calculated TE vs. Reference Value for Low Level Samples by Two Reagent Lots, With Linear Regression Model Fit and Extrapolation

```{r Figure D2}

table_d3 %>%
  ggplot(aes(x = reference_value, y = TE)) +
  geom_point(aes(color = reagent_lot, shape = reagent_lot)) +
  geom_smooth(method = "lm", formula = y ~ x, level = 0, color = "black", fullrange = TRUE) +
  geom_hline(yintercept = 21.6) +
  scale_x_continuous(name = "[Estradiol] (pg/mL)", limits = c(10, 45), breaks = seq(10, 45, 5)) +
  scale_y_continuous(name = "Total Error (%)", limits = c(0, 60), breaks = seq(0, 60, 10)) +
  scale_color_discrete(name = "") +
  scale_shape_manual(name = "", values = c("square", "circle"))


```

###  Table D4
Observed Results for Reagent Lot A

```{r Table D4}

table_d4 <- tibble(
  day = rep(1:3, each = 3),
  replicate = rep(1:3, times = 3),
  `Pool 1` = c(36.7, 37.9, 38.3, 36.8, 33.5, 39.2, 41.3, 37.9, 34.9),
  `Pool 2` = c(49.9, 50.0, 48.1, 47.8, 43.9, 45.6, 45.4, 51.5, 45.8),
  `Pool 3` = c(46.1, 43.1, 39.4, 47.3, 45.8, 44.8, 44.6, 47.3, 38.9),
  `Pool 4` = c(33.3, 34.2, 34.5, 43.1, 34.0, 37.1, 35.3, 32.4, 36.0),
  `Pool 5` = c(42.9, 41.8, 43.8, 46.3, 43.3, 46.0, 42.6, 41.1, 42.8))

```

###  Table D5
Observed Results for Reagent Lot B

```{r Table D5}

table_d5 <- tibble(
  day = rep(1:3, each = 3),
  replicate = rep(1:3, times = 3),
  `Pool 1` = c(38.5, 41.0, 43.2, 36.8, 42.1, 35.8, 36.8, 44.1, 39.5),
  `Pool 2` = c(45.8, 47.8, 46.6, 46.9, 51.3, 50.5, 44.3, 47.5, 52.4),
  `Pool 3` = c(46.7, 43.6, 42.4, 46.5, 47.9, 42.7, 42.1, 43.4, 44.7),
  `Pool 4` = c(35.5, 40.0, 34.0, 32.9, 33.1, 38.6, 36.2, 41.4, 33.0),
  `Pool 5` = c(42.0, 44.1, 43.2, 46.6, 45.5, 43.5, 41.4, 48.2, 45.7))

```

## Appendix E
Verification of Limits of Blank and Detection Claims

### Table E1
Observed Blank and Positive Results for LoB/LoD Verification (Units are ng/mL)

```{r Table E1}

table_e1 <- tibble(
  rank = 1:24,
  Blanks = c(0.00, 0.00, 0.00, 0.00,  0.00,  0.00,  0.00,  0.00,  1.08,  1.92,  2.38,  2.98, 
             3.80, 4.78, 7.30, 8.81, 10.31, 11.29, 13.48, 14.39, 16.97, 17.40, 18.01, 22.65),
  Positives = c(18.80, 19.02, 26.63, 26.91, 31.08, 33.99, 35.11, 35.90, 41.67, 43.90, 46.32, 47.77, 
                47.99, 48.83, 54.67, 57.30, 59.10, 61.17, 61.96, 62.97, 66.44, 73.44, 73.80, 75.71)
)

manufacturer_LoB <- 20  # ng/mL
below_LoB <- sum(table_e1$Blanks <= manufacturer_LoB) / length(table_e1$Blanks)       # * 100%
above_LoD <- sum(table_e1$Positives > manufacturer_LoB) / length(table_e1$Positives) # * 100%
```

### Section 7, Table 1
```{r Compare against table 1 from section 7}
# The table in 7.1 is modified from 10.1373/clinchem.2003.029983 
# I have no idea how to reproduce it, despite a few day's worth of trying
# Instead, I will simply reproduce it

sec7_1_t1 <- tibble(
  N = c(20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200, 250, 300, 400,  500, 1000),
  obs_proportion_boundary = c(0.85, 0.87, 0.88, 0.88, 0.90, 0.90, 0.90, 0.91, 0.91, 0.92, 0.92, 0.92,  0.93, 0.93, 0.93, 0.94)
)

# Find the first row in Table 1 from Section 7 where N is equal to or greater than the number of measurements in the study
# e.g., the greater of the bracketing N values
limit <- sec7_1_t1 %>%
  filter(N >= nrow(table_e1)) %>%
  head(1) %>%
  pull(obs_proportion_boundary)

# Confirm that the acceptance criteria are met
if ((below_LoB > limit) &    # More blanks are below the limit than the minimum allowed
    (above_LoD > limit) ) {  # More positives are above the limit than the minimum allowed
    message("Limit of blank and limit of detection claims are verified")
} 

```

## Appendix F
Verification of Limit of Quantification Claim

### Table F1
Observed Results for LoQ Verification (Units are pg/mL)

```{r Table F1}
table_f1 <- tibble(
  day = rep(c(1, 2, 3), times = 3),
  `Sample 1` = c(47.8, 44.6, 47.1, 50.8, 48.2, 52.5, 49.4, 52.0, 46.3),
  `Sample 2` = c(47.3, 48.8, 47.6, 54.7, 50.7, 50.8, 52.5, 50.4, 49.6),
  `Sample 3` = c(49.7, 51.2, 57.3, 54.6, 49.3, 53.3, 58.0, 49.5, 52.2),
  `Sample 4` = c(50.4, 49.5, 44.0, 51.5, 51.4, 49.8, 46.1, 45.7, 50.9),
  `Sample 5` = c(53.7, 52.7, 55.9, 55.1, 55.5, 57.3, 51.8, 48.8, 51.7)
)

te_limit = 0.15  # * 100%

table_f1_limits <- tibble(
  sample = paste("Sample", 1:5),
  target_value = c(46.4, 45.8, 49.1, 46.3, 49.7),
  lower_limit = round(target_value * (1-te_limit), digits = 1),
  upper_limit = round(target_value * (1+te_limit), digits = 1)
)
```

```{r Tidy Table Cleanup}
table_f1 <- table_f1 %>%
  pivot_longer(cols = starts_with("Sample"), names_to = "sample", values_to = "value") %>%
  left_join(table_f1_limits, by = "sample")
```

```{r Calculate Outliers}

table_f1 <- table_f1 %>%
  mutate(outlier = value < lower_limit | value > upper_limit)

meets_TE_goal <- sum(!table_f1$outlier) / nrow(table_f1)

# Reference table 1 in section 7 again to find acceptable limit
limit <- sec7_1_t1 %>%
  filter(N >= nrow(table_f1)) %>%
  head(1) %>%
  pull(obs_proportion_boundary)

# Confirm that the acceptance criteria are met
if (meets_TE_goal > limit) message("Limit of quantification claim is verified")

```