OriginalCode.Rmd

---
title: "Code for analyses (original)"
description: |
  Code used for analyses in paper.
author:
  - name: Rachael Burke
    url: https://www.lshtm.ac.uk/aboutus/people/burke.rachael
    affiliation: LSHTM
    affiliation_url: 
date: "`r Sys.Date()`"
output: distill::distill_article
---

RMD document for analysis for spine paper

## 0. Setup
Load the required libraries and data

```{r zeroa, echo=TRUE, message=FALSE, warning=FALSE}
## NB. one of these has "MASS" as a dependency, MASS also has 'select' as a function, so when you want normal 'select', need to specify dplyr::select
library(tidyverse)
library(lubridate)
library(knitr)
library(here)
library(janitor)
library(gt)
library(markdown)
library(ggtext)
library(naniar)
library(arsenal)
library(wesanderson)

library(rcompanion)
library(modelr)
library(vcd)
library(emmeans)
library(fitdistrplus)
library(splines)
library(rms)
library(stats)
library(Amelia)
library(mice)
library(boot)
library(broom)


load(here("data-raw", "spine.rda"))
spine <- spine

spine <- spine %>% #tweaking code to sensible value names
  dplyr::select(outcome,sex,quarter,ageg10,hiv,art) %>% 
  mutate(hiv=as.factor(hiv)) %>%
    mutate(hiv=fct_recode(hiv, "Negative" = "NON-REACTIVE",
                           "Positive" = "REACTIVE")) %>%
    mutate(art2 = case_when(art=="CONFIRMED DEFAULTER" ~ "no_art",
         art=="NO" ~ "no_art",
         art=="YES" ~ "yes_art")) %>%
  mutate(art2=if_else(is.na(hiv),"999",art2)) %>%
  mutate(art2=na_if(art2,"999")) %>% #need to revert to NA - I can't work out how to do this in one go
  mutate(art=art2) %>%
  mutate(art=as.factor(art))

load(here::here("data-raw", "cens_edit.rda"))
cens_edit <- cens_edit

# from https://gist.github.com/andrewheiss/a3134085e92c6607db39c5b14e1b879e (with some tweaks made)
 meld_predictions <- function(x) {
  x_melded <- mi.meld(matrix(x$.fitted), matrix(x$.se.fit)) #mi.meld from Amelia
  tibble(.fitted = as.numeric(x_melded$q.mi),
             .se.fit = as.numeric(x_melded$se.mi))
 }

# Date breaks for graphs 
breaks <- dmy(c("01-10-2012","01-01-2016","01-07-2019"))
breaks1 <- dmy(c("01-10-2012","01-01-2015","01-04-2017","01-07-2019"))

```

## 1. Table one

```{r one,echo=TRUE, results="asis"}
# What is the midpoint?
midpoint <- floor_date(median(cens_edit$quarter),"quarter")
a <- cens_edit %>% 
  filter(quarter==midpoint) %>% 
           uncount(pop) %>% 
           mutate(which="census") %>% 
           mutate(hiv=NA) %>%
  mutate(outcome=NA) %>%
  dplyr::select(-quarter,)

b <- spine %>% mutate(which="admissions") %>% dplyr::select(sex, ageg10,hiv, art, outcome,which) %>% mutate(outcome=as.factor(outcome))

c <- rbind(a,b)

x <- tableby(which ~ ageg10 + sex + includeNA(hiv) + includeNA(outcome) + art, data=c, total=FALSE, test=FALSE)
summary(x)
```

## 2. Stacked barchart -- figure 1

```{r two, echo=TRUE, warning=FALSE}
label_sum <- spine %>%
  group_by(ageg10, sex) %>%
  dplyr::summarise(sum = sum(n()))

color <- c(wes_palettes$Zissou1[1],wes_palettes$Zissou1[4],wes_palettes$Zissou1[5])

spine %>% 
  mutate(hiv=as.character(hiv)) %>%
  mutate(hiv=replace_na(hiv,"Unknown")) %>%
  mutate(hiv=factor(hiv, levels=c("Unknown", "Negative","Positive"))) %>%
  mutate(q = group_indices(.,quarter)) %>%
  group_by(quarter) %>%
  dplyr::mutate(number=n()) %>%
  ggplot() +
  geom_bar(aes(fill=hiv,x=quarter), position="fill") +
  geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=0.95, 
                label=paste0("n=",sum, sep=""))) +
  facet_grid(sex ~ageg10) + 
  scale_fill_manual(values=wes_palette("BottleRocket2")) +
    labs(
      #title="HIV status of adults admitted to QECH",
      #subtitle = "By quarter, and sex. n = total numer admissions between Q3.2012 to Q3.2019",
       x="Year and quarter",
       y="Proportion of admissions by HIV status",
       fill="HIV status") +
  scale_y_continuous() +
  scale_x_date(breaks=breaks1, date_labels = "%b %Y") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here::here("figures/fig_1.pdf"), width = 9, height = 6, dpi=300)
```

## 3. Run imputation

Notes:  This takes about ten minutes to run on my laptop.  The file with imputation already done is in data  folder, and loads at start of next chunk.  To check / run imputation take out # (set here to avoid having to run imputation to knit file each time), and be careful about what file is loaded at start of next chunk.

25 imputations run as 25% missingness for HIV status.
Assume HIV status MAR

* For everyone who had HIV status NA, but was imputed to be HIV positive, ART status is imputed as not on ART. This is because people who don't know their HIV status (presumed reasonable proportion of people with HIV status "missing" in database) won't be on ART.  ART status in people who are "truly" HIV positive but NAs in dataset is MNAR, and difficult to know how to make assumptions.  This is a relatively small proportion of patients, and only relavent for part of the secondary objective.

``` {r threeimpute, echo=TRUE, warning=FALSE}
pred_matrix <- make.predictorMatrix(spine) #this is default predictor matrix - all values help impute all other values (apart from itself)
pred_matrix[,6] <- 0 # changing the art column to 0 so that ART status not used to impute anything else 
pred_matrix[6,5] <- 0 # also asking HIV status not to be used to impute ART status, as I'm only going to try to impute art in those with HIV positive, so HIV status has no predictor value

# This is making a logical dataframe of what values should get imputed.  For most variables it's all NAs that get imputed, but for ART it's only those that are known HIV positive and NA
where <- spine %>%
  mutate(art=case_when(
    is.na(art)==FALSE ~ FALSE,
    is.na(art)==TRUE & hiv=="Positive" ~ TRUE,
    TRUE ~ FALSE
  )) %>%
  mutate(hiv=is.na(hiv)) %>%
  mutate(ageg10=is.na(ageg10)) %>%
  mutate(quarter=is.na(quarter)) %>%
  mutate(sex=is.na(sex)) %>%
  mutate(outcome=is.na(outcome))

# Then run the imputation --- this takes about 10 minutes to run -  have '#-ed' it by default to avoid running it every time I knit the website.

######## IMPUTATION HERE!! ############
######################################

#spine_mice <- mice(spine,m=25,maxit=5,seed=123,where=where,method="pmm",predictorMatrix=pred_matrix)

#save(spine_mice, file = "data-raw/spine_mice.rda")

####################################
###################################

``` 


Load imputed dataset, extract completed data from mids object, group by HIV status to get proportions / counts and merge with census data for population denomiator.

```{r threeb, echo=TRUE, warning=FALSE}
load(here("data-raw", "spine_mice.rda"))  #NB. loads from data folder; can recreate using code above
#summary(spine_mice)

#get the imputed datasets in long format

spine_imp <- mice::complete(spine_mice, action="long", include = FALSE)

# checking the number of HIV positive admissions in each imputation
x <- spine_imp %>% group_by(.imp) %>% filter(hiv=="Positive") %>% dplyr::mutate(number=n()) %>% dplyr::select(.imp,number) %>% unique()
hist(x$number, main="Imputed number HIV+ admissions (25 imputations)")

#summarise the data sets by group to get the counts
spine_imp_gp <- spine_imp %>%
  group_by(.imp, quarter, ageg10, sex, hiv) %>% #NB. not interested in 'outcome' for this bit, so not grouped by this
  dplyr::summarise(n_imputed=n())

#merge the denominators to the imputed grouped datasets
both_imp_gp <- left_join(spine_imp_gp, cens_edit) %>%
  mutate(n = case_when(is.na(n_imputed) ~ 0L,
                       TRUE ~ n_imputed)) %>%  # 'n' is now from n_imputed
  mutate(inc = (n/pop) * 100000) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter))

## Check imputed data (one of the imputations) by graphing original data and imputed 'observations'.
spine_imp2 <- mice::complete(spine_mice, action="long", include = TRUE)
spine_imp2$hiv_recorded <- cci(spine$hiv)
spine_imp2 <- spine_imp2 %>% filter(.imp==12) # take one of imputations

original_obs <- spine_imp2 %>%
  filter(hiv_recorded==TRUE) %>% # these are the ones with HIV wasn't missing
  group_by(quarter, ageg10, sex, hiv) %>%
  summarise(n=n()) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter))

original_obs %>%  
  mutate(HIV=as.character(hiv)) %>%
  mutate(HIV=replace_na(HIV,"Unknown / Missing")) %>%
  mutate(HIV=factor(HIV, levels=c("Unknown / Missing", "Negative","Positive"))) %>%
  ggplot() +
  geom_bar(aes(fill=HIV,x=quarter, y=n), stat="identity") +
  facet_grid(sex ~ageg10) + 
  scale_fill_manual(values=wes_palette("BottleRocket2",3)) +
    labs(
      title = "Original observations only (excl. NAs)",
       x="Year and quarter",
       y="Number of adult admissions",
      fill="HIV status") +
  scale_y_continuous() +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

imputed_obs <- spine_imp2 %>%
  filter(hiv_recorded==FALSE) %>% # these are the ones with HIV wasn't missing
  group_by(quarter, ageg10, sex, hiv) %>%
  summarise(n=n()) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter))

imputed_obs %>%  
  mutate(HIV=as.character(hiv)) %>%
  mutate(HIV=replace_na(HIV,"Unknown / Missing")) %>%
  mutate(HIV=factor(HIV, levels=c("Unknown / Missing", "Negative","Positive"))) %>%
  ggplot() +
  geom_bar(aes(fill=HIV,x=quarter, y=n), stat="identity") +
  facet_grid(sex ~ageg10) + 
  scale_fill_manual(values=wes_palette("BottleRocket2",3)) +
    labs(
      title = "Imputed observations only",
       x="Year and quarter",
       y="Number of adult admissions",
      fill="HIV status") +
  ylim(0,200) +
  scale_y_continuous() +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

# These two graphs looks fairly similar

```

## 4. Create the model for incidence of HIV positive neg binomial 3 splines

Creates 25 different models (one for each imputed data frame)

```{r four, echo=TRUE, warning=FALSE}
#nest by imputed grouped dataset
by_imp <- both_imp_gp %>%
  ungroup() %>%
  nest_by(.imp)

# Fit the model for each imputed grouped dataset.  For HIV positive admissions.
model_imp_n3 <- by_imp %>% dplyr::mutate(model = list(glm.nb(n ~ ns(q, knots = 3)*ageg10*sex, data = data %>% filter(hiv=="Positive"),offset(log(pop)))))

# Same model for HIV negative admissions
model_neg <- both_imp_gp %>% ungroup() %>% filter(hiv=="Negative") %>% nest_by(.imp) %>% mutate(model = list(glm.nb(n ~ ns(q, knots = 3)*ageg10*sex, data = data, offset(log(pop)))))

```

## 5. Get the predictions from the model 

Using Amelia mi.meld tools and Andrew Heiss' approach https://gist.github.com/andrewheiss/a3134085e92c6607db39c5b14e1b879e -- slightly modified

Build models seperately for incidence of both HIV positive and HIV negative admissions.

```{r five, echo=TRUE, warning=FALSE}

model2pred_imp <- function(df,m,var1,var2,var3,x){
  
  scaffold_df <- both_imp_gp %>%
    filter(hiv==x) %>% # HIV status
    ungroup() %>%
    dplyr::select(-.imp) %>%
    expand(q,ageg10,sex) 
  
  df2 <- df %>% filter(hiv==x)
  
  predict_melded <- tibble(models = m$model) %>%
    ungroup() %>%
    mutate(m = 1:n(),
         fitted = models %>% map(~ augment(., newdata = scaffold_df, se_fit=TRUE))) %>% # this gives fit from each model 
  unnest(fitted) %>%
  dplyr::select(m,q,ageg10,sex,.fitted,.se.fit) %>% # added in select to get rid of model df that was slowwing everything down....
  group_by(q,ageg10,sex) %>%  # Group by each of the variables that you vary
  nest(data= c(m, .fitted, .se.fit))  %>% # need to add 'm' here, otherwise nothing to meld over
  mutate(fitted_melded = data %>% map(~ meld_predictions(.))) %>%  #using the meld_predictions function I just defined
  unnest(fitted_melded) %>% 
    mutate(n_est = exp(.fitted)) %>%
    mutate(n_low =exp(.fitted - 1.96*.se.fit)) %>% #keep on response scale here
    mutate(n_high = exp(.fitted + 1.96*.se.fit))%>%
  left_join(df2) %>%
  mutate(inc=exp((.fitted - log(pop)) + log(100000))) %>% #incidence per 100,000
  mutate(inc_low=exp(((.fitted - 1.96*.se.fit) - log(pop)) + log(100000))) %>% 
  mutate(inc_high=exp(((.fitted + 1.96*.se.fit) - log(pop)) + log(100000))) %>%
    ungroup()
}

# Model predicted outputs
out_n3 <- model2pred_imp(both_imp_gp,model_imp_n3,q,ageg10,sex,"Positive")  # so this has 25 imputed dataframes, but inc, n_est and their CIs are exactly the same now across all imputed data frames

out_neg <- model2pred_imp(both_imp_gp,model_neg,q,ageg10,sex,"Negative")
```

## 6. Plot the predictions

Dots are from imputed dataset 12, model predictions are from melding across all dataframes.


```{r six, echo=TRUE, warning=FALSE}
ggplot() +
  geom_line(data=out_n3, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_n3, aes(x=quarter, ymin = inc_low, ymax = inc_high, fill=hiv), alpha = 0.2) +
  geom_line(data=out_neg, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_neg, aes(x=quarter, ymin = inc_low, ymax = inc_high,fill=hiv), alpha = 0.2) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Positive"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Negative"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  facet_grid(sex ~ ageg10) +
  scale_x_date(breaks=breaks1, date_labels = "%b%Y ") +
  scale_y_continuous(limits=c(0,500)) +
  scale_color_manual(values=c("Positive"="#0066FF","Negative" = "grey")) +
  scale_fill_manual(values=c("Positive"="#0066FF","Negative" = "grey")) +
  labs(
    title = "Incidence of hospital admission to QECH",
    subtitle="Quarterly population level incidence, by age, sex and HIV status",
    caption="Negative binomial model with three splines \n Points from one (of 25) imputed dataset, estimates combined across all datasets using Rubin's rules",
    fill="HIV status",
    color="HIV status") +
  ylab("Incidence of admission per 100,000 population") +
  xlab("Year and quarter") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/fig_2.pdf"), width = 9, height = 6, dpi=300)

```

## 7. Determine the number of admissions "averted" and CI in imputed datasets

"Averted" = number fewer admissions than would have been if Q3.2012 incidence stayed same.  Note 'Averted' used as a shorthand verb, not assuming causation.

CI using parametric bootstrapping

Set up functions

```{r seven, echo=TRUE}
# Point estimate from 'real' data on the number of admissions averted (singling out .fitted on response scale, as in next change this is replaced by normal distribution to sample from)
avert_est <- function(df){
  df <- 
    {if (".imp" %in% names(df)==TRUE) df %>% filter(.imp==1) %>% dplyr::select(-.imp) else df} #NB. Each .imp contains exactly the same (melded) estimates of .fitted and .se.fitted (they remained nested in 25 dfs). This "if" bit is here so same function can be used in non-imputed dataframes.
    worst <- df %>% filter(quarter==dmy("01 Oct 2012")) %>%  
    mutate(q32012_inc = exp((
      .fitted
      - log(pop)))) %>%
    right_join(df) %>%
    group_by(ageg10,sex,hiv) %>% fill(q32012_inc) %>% ungroup() %>%
    mutate(worst_est= q32012_inc * pop) %>% 
    mutate(actual_est = exp(  #at the moment, this is redundant as already have exp(fitted) in df, but putting here for completeness when come to bootstrap estimate
    .fitted
    )) %>%
    mutate(avert_est = worst_est - actual_est)
}

avert_all_data <- function(df,lab){
  worst <- df %>% dplyr::select(worst_est) %>% sum()
  actual <- df%>% dplyr::select(actual_est) %>% sum()
  averted <- worst - actual
  averted_p <- averted/worst
  tribble(~"NA_handling", ~"worst", ~"actual", ~"averted",~"averted_p",
          lab, worst, actual, averted, averted_p)
}

# purr / pmap is from https://stackoverflow.com/questions/46207434/using-rnorm-for-a-dataframe 

# A bootstrap-able function, where .fitted is changed to a normal distribution with mean .fitted and st dev .se fitted.
# As must all be in one function to bootstrap, this combines 'avert_est' and 'avert_all_data'

avert_boot <- function(df){ # This creates estimates for each agegroup and sex category, sampling from normal distrubtion (on response scale) with mean = point estimate fitted model and sd = st error of fit, then exponentiated to link scale.
  df <- 
    {if (".imp" %in% names(df)==TRUE) df %>% filter(.imp==1) else df} #NB. Each .imp contains exactly the same (melded) estimates of .fitted and .se.fitted (they remained nested in 25 dfs)
  DF <- df %>% filter(quarter==dmy("01 Oct 2012")) %>%  
    mutate(q32012_inc = exp((
      as.numeric(pmap(list(.fitted,.se.fit), function(mu, sd) rnorm(1, mu, sd)))  # changed .fitted (point est of number admissions) to function of normal distrubctuion with mean .fitted and st dev se.fitted.  This should be bootstrapp-able.
      - log(pop)))) %>% # minus logs to determine population incidence
    right_join(df,by = c("q", "sex", "ageg10", ".fitted", ".se.fit", "n_est", "n_low", "n_high", "quarter", "pop", "n", "hiv", "inc", "inc_low", "inc_high")) %>%
    group_by(ageg10,sex,hiv) %>% fill(q32012_inc) %>% ungroup() %>%
    mutate(worst_est= q32012_inc * pop) %>%
    mutate(actual_est = exp(
      as.numeric(pmap(list(.fitted,.se.fit), function(mu, sd) rnorm(1, mu, sd)))
    )) %>%
    mutate(avert_est = worst_est - actual_est) %>%
    mutate(avert_p_est = avert_est / worst_est)
}

boot_fx <- function(df){
  df2 <- avert_boot(df)
  worst <- df2 %>% dplyr::select(worst_est) %>% sum()
  actual <- df2 %>% dplyr::select(actual_est) %>% sum()
  avert <- df2 %>% dplyr::select(avert_est) %>% sum()
  avert_p <- avert/worst
  c(worst, actual, avert,avert_p)
}

boot_fx_byageandsex <- function(df){
  df2 <- avert_boot(df) %>% dplyr::arrange(ageg10,sex) 
  group_vars <- df2 %>% group_by(ageg10,sex) %>%
    summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>%
    dplyr::select(ageg10,sex) 
  df2 %>% group_by(ageg10,sex) %>%
    summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>% ungroup() %>%
        mutate(avert_p=avert_est/worst_est) %>%
    dplyr::select(worst_est, actual_est, avert_est,avert_p) %>% as.matrix()
}

group_vars <- function(df){
  df2 <- avert_boot(df) %>% dplyr::arrange(ageg10,sex) 
  group_vars <- df2 %>% group_by(ageg10,sex) %>%
    summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>%
    dplyr::select(ageg10,sex)
  group_vars
  }
                                                                                        
```
Run for imputed dataset

``` {r sevenb, echo=TRUE}
# run function once to check - these should be fairly similar
avert_all_data(avert_est(out_n3),"imp") #df of output of point estimates
boot_fx(out_n3) # matrix of one boot rep of estimates

# now run function 1000 times and plot
boot_n3 <- boot(out_n3,boot_fx,R=1000,sim="parametric") #1000 iterations 
sd_n3 <- c(sd(boot_n3$t[,1]),sd(boot_n3$t[,2]),sd(boot_n3$t[,3]),sd(boot_n3$t[,4])) # boot command output is a vector of estimate for worst / actual / averted; so boot output is a matrix.  Calculating st deviation for each column of matrix
sd_n3[[1]]

hist(boot_n3$t[,1]) # check histogram output
hist(boot_n3$t[,2])
hist(boot_n3$t[,3])
hist(boot_n3$t[,4])

## Make into a table
avert_table <- function(df,sd,lab) {
  avert_all_data(avert_est(df),lab) %>%
  mutate(worst.hi = worst + 1.96*(sd[[1]])) %>%
  mutate(worst.lo = worst - 1.96*(sd[[1]])) %>%
  mutate(actual.hi = actual + 1.96*(sd[[2]])) %>%
  mutate(actual.lo = actual - 1.96*(sd[[2]])) %>%
  mutate(avert.hi = averted + 1.96*(sd[[3]])) %>%
  mutate(avert.lo = averted - 1.96*(sd[[3]])) %>%
  mutate(avertp.hi = averted_p + 1.96*(sd[[4]])) %>%
  mutate(avertp.lo = averted_p - 1.96*(sd[[4]])) %>%
  mutate(worst_ = paste0(round(worst,digits=0)," (",round(worst.lo,digits=0)," to ",round(worst.hi, digits=0),")")) %>%
  mutate(actual_ = paste0(round(actual,digits=0)," (",round(actual.lo,digits=0)," to ",round(actual.hi, digits=0),")")) %>%
  mutate(averted_ = paste0(round(averted,digits=0)," (",round(avert.lo,digits=0)," to ",round(avert.hi, digits=0),")")) %>%
  mutate(avertedp_ = paste0(round(averted_p,digits=3)," (",round(avertp.lo,digits=3)," to ",round(avertp.hi, digits=3),")")) 
}
  
imp <- avert_table(out_n3,sd_n3,"imp")
```

## 8. Sensitivity analysis for estimates of admissions 'averted'  (Table 2 at the end of this)

Create grouped data and number admissions for various assumptions about HIV status (negative, positive, postitive_and_unknown, negative_and_unknown) from spine (individual) dataset and census.  

Then filter this dataset for desired sensitivity analyses

```{r eight, echo=TRUE}
# Construct grouped data from spine and census dataset

spine_gp_all_orig <- spine %>%  ## grouped, all admissions
  group_by(quarter,ageg10,sex) %>%
  summarise(n=n(), .groups="keep") %>%
  mutate(hiv="all") %>%
  ungroup()

spine_gp_hivpos_orig <- spine %>% # grouped, HIV pos
  filter(hiv=="Positive") %>%
  group_by(quarter,ageg10,sex) %>%
  summarise(n=n(), .groups="keep") %>%
  mutate(hiv="pos") %>%
  ungroup()

spine_gp_hivneg_orig <- spine %>% # grouped, HIV neg
  filter(hiv=="Negative") %>%
  group_by(quarter,ageg10,sex) %>%
  summarise(n=n(), .groups="keep") %>%
  mutate(hiv="neg") %>%
  ungroup()

spine_gp_hivunk_orig <- spine %>% # grouped, HIV unknown
  filter(is.na(hiv)) %>%
  group_by(quarter,ageg10,sex) %>%
  summarise(n=n(),.groups="keep") %>%
  mutate(hiv="unk") %>%
  ungroup()

spine_gp_hivposandunk_orig <- spine %>%  # grouped, HIV pos and unknown grouped together
  filter(hiv=="Positive" | is.na(hiv)) %>%
  group_by(quarter,ageg10,sex) %>%
  summarise(n=n(),.groups="keep") %>%
  mutate(hiv="posandunk") %>%
  ungroup()

spine_gp_orig <- rbind(spine_gp_all_orig, spine_gp_hivpos_orig, spine_gp_hivneg_orig,spine_gp_hivunk_orig,spine_gp_hivposandunk_orig)

both_orig <- left_join(cens_edit,spine_gp_orig) %>%  # add census denomiators
  mutate(inc = (n/pop) * 100000) %>%
  ungroup() %>%
    mutate(q = group_indices(.,quarter))


model2pred_orig <- function(df,m){
  df %>%
    data_grid(q, sex, ageg10, .model = m) %>%
    augment(m, newdata = ., type.predict="link", se_fit=TRUE) %>%
    mutate(n_est = exp(.fitted)) %>%
    mutate(n_low =exp(.fitted - 1.96*.se.fit)) %>%
    mutate(n_high = exp(.fitted + 1.96*.se.fit))%>%
     left_join(df) %>%
  mutate(inc=exp((.fitted - log(pop)) + log(100000))) %>% #incidence per 100,000
  mutate(inc_low=exp(((.fitted - 1.96*.se.fit) - log(pop)) + log(100000))) %>% 
  mutate(inc_high=exp(((.fitted + 1.96*.se.fit) - log(pop)) + log(100000))) %>%
    ungroup()
}


```

And now run for non-imputed datasets


``` {r eightb, echo=TRUE}
both_orig_hivpos <- both_orig %>% filter(hiv=="pos")
pos_only <- glm.nb(n ~ ns(q, knots=3)*sex*ageg10, offset(log(pop)), data=both_orig_hivpos)
out_pos <- model2pred_orig(both_orig_hivpos,pos_only)

# run function once to check - these should be fairly similar
avert_all_data(avert_est(out_pos),"Unknowns_neg") #df of output of point estimates
boot_fx(out_pos) # matrix of one boot rep of estimates

# now run function 1000 times and plot
boot_pos <- boot(out_pos,boot_fx,R=1000,sim="parametric") #1000 iterations (takes about 90 secs to run)
sd_pos <- c(sd(boot_pos$t[,1]),sd(boot_pos$t[,2]),sd(boot_pos$t[,3]),sd(boot_pos$t[,4]))
sd_pos[[1]]

hist(boot_n3$t[,1]) # check histogram output
hist(boot_n3$t[,2])
hist(boot_n3$t[,3])
hist(boot_n3$t[,4])

## Make into a table
na_neg <- avert_table(out_pos,sd_pos,"Unknowns_neg")

```

```{r eightc, echo=TRUE}
both_orig_hivposunk <-  both_orig %>% filter(hiv=="posandunk")
posandunk <- glm.nb(n ~ ns(q, knots=3)*sex*ageg10, offset(log(pop)), data=both_orig_hivposunk)
out_unk <- model2pred_orig(both_orig_hivposunk,posandunk)

# run function once to check - point estimates and bootstrap est (1 rep) should be fairly similar
avert_all_data(avert_est(out_unk),"Unknowns_pos") #df of output of point estimates
boot_fx(out_unk) # matrix of one boot rep of estimates

# now run function 1000 times and plot
boot_unk <- boot(out_unk,boot_fx,R=1000,sim="parametric") #1000 iterations (takes about 90 secs to run)
sd_unk <- c(sd(boot_unk$t[,1]),sd(boot_unk$t[,2]),sd(boot_unk$t[,3]),sd(boot_unk$t[,4]))
sd_pos[[1]]

hist(boot_unk$t[,1]) # check histogram output
hist(boot_unk$t[,2])
hist(boot_unk$t[,3])
hist(boot_unk$t[,4])

## Make into a table
na_pos <- avert_table(out_unk,sd_unk,"Unknowns_pos")
```

Construct table 2

```{r eightd, echo=TRUE}
imp %>% rbind(na_pos) %>% rbind(na_neg) %>% dplyr::select(NA_handling, worst_, actual_, averted_, avertedp_) %>% gt()
```
We can sense check these with the actual 'actual' numbers (not model estimates) for where we make assumptions about NAs.  Also confirms that model fits reasonably well if model predicted numbers v. close to actual numbers (neg binomial, 3 splines)

```{r eighte, echo=TRUE}
spine %>% filter(hiv=="Positive") %>% nrow() 
spine %>% filter(hiv=="Positive" | is.na(hiv)==TRUE) %>% nrow()
```

## 9. Admissions "averted" by age and sex


```{r nine, echo=TRUE}
boot_n3_agesex <- boot(out_n3, boot_fx_byageandsex, R=1000, sim="parametric")
# get standard deviations

sd_out_n3_agesex <- boot_n3_agesex$t %>%
  as_tibble() %>%
  summarise_all(., sd)

sd_n3 <- as.data.frame(
  matrix(sd_out_n3_agesex, nrow=12, ncol=4, byrow=FALSE)) %>% 
  rename(c("sd_worst_est"="V1", "sd_actual_est"="V2", "sd_averted_est"="V3", "sd_averted_perc"="V4")) %>%
  cbind(group_vars(out_n3)) %>% unnest(cols = c(sd_worst_est, sd_actual_est, sd_averted_est, sd_averted_perc))

boot_outpos_agesex <- boot(out_pos, boot_fx_byageandsex, R=1000, sim="parametric")
sd_outpos_agesex <- boot_outpos_agesex$t %>% as_tibble() %>% summarise_all(., sd)
   
sd_pos <- as.data.frame(
  matrix(sd_outpos_agesex, nrow=12, ncol=4, byrow=FALSE)) %>% 
  rename(c("sd_worst_est"="V1", "sd_actual_est"="V2", "sd_averted_est"="V3","sd_averted_perc"="V4")) %>%
  cbind(group_vars(out_pos)) %>% unnest(cols = c(sd_worst_est, sd_actual_est, sd_averted_est, sd_averted_perc))

boot_outunk_agesex <- boot(out_unk, boot_fx_byageandsex, R=1000, sim="parametric")
sd_outunk_agesex <- boot_outunk_agesex$t %>% as_tibble() %>% summarise_all(., sd)

sd_unk <- as.data.frame(
  matrix(sd_outunk_agesex, nrow=12, ncol=4, byrow=FALSE)) %>% 
  rename(c("sd_worst_est"="V1", "sd_actual_est"="V2", "sd_averted_est"="V3","sd_averted_perc"="V4")) %>%
  cbind(group_vars(out_unk)) %>% unnest(cols = c(sd_worst_est, sd_actual_est, sd_averted_est, sd_averted_perc))

n3_ageandsex <- avert_est(out_n3) %>% group_by(ageg10,sex) %>% summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>% mutate(avert_p_est=avert_est / worst_est) %>% left_join(sd_n3) %>% ungroup()
pos_ageandsex <- avert_est(out_pos) %>% group_by(ageg10,sex) %>% summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>% mutate(avert_p_est=avert_est / worst_est) %>% left_join(sd_pos) %>% ungroup()
unk_ageandsex <- avert_est(out_unk) %>% group_by(ageg10,sex) %>% summarise_at(vars(worst_est,actual_est,avert_est),funs(sum)) %>% mutate(avert_p_est=avert_est / worst_est) %>%left_join(sd_unk) %>% ungroup()


a <- pos_ageandsex %>% left_join(unk_ageandsex, by=c("ageg10", "sex"), suffix=c(".pos", ".unk"))
b <- n3_ageandsex %>% left_join(a, by=c("ageg10","sex"), suffix=c(".imp",".cc"))

tab_3 <- b %>% dplyr::select(c(ageg10, sex, starts_with("avert"), starts_with("sd_avert"))) %>%
  mutate(imp = paste0(
    round(avert_est,digits=0)," (",round(avert_est - 1.96*(sd_averted_est),digits=0)," to ",round((avert_est + 1.96*(sd_averted_est)),digits=0),")"
    )) %>%
  mutate(imp_p = paste0(
    round(avert_p_est,digits=3)," (",round(avert_p_est - 1.96*(sd_averted_perc),digits=3)," to ",round((avert_p_est + 1.96*(sd_averted_perc)),digits=3),")"
    )) %>%
    mutate(unk_neg = paste0(
    round(avert_est.pos,digits=0)," (",round(avert_est.pos - 1.96*(sd_averted_est.pos),digits=0)," to ",round((avert_est.pos + 1.96*(sd_averted_est.pos)),digits=0),")"
    )) %>%
    mutate(unk_neg_p = paste0(
    round(avert_p_est.pos,digits=3)," (",round(avert_p_est.pos - 1.96*(sd_averted_perc.pos),digits=3)," to ",round((avert_p_est.pos + 1.96*(sd_averted_perc.pos)),digits=3),")"
    )) %>%
    mutate(unk_pos = paste0(
    round(avert_est.unk,digits=0)," (",round(avert_est.unk - 1.96*(sd_averted_est.unk),digits=0)," to ",round((avert_est.unk + 1.96*(sd_averted_est.unk)),digits=0),")"
    )) %>%
    mutate(unk_pos_p = paste0(
    round(avert_p_est.unk,digits=3)," (",round(avert_p_est.unk - 1.96*(sd_averted_perc.unk),digits=3)," to ",round((avert_p_est.unk + 1.96*(sd_averted_perc.unk)),digits=3),")"
    )) %>%
  dplyr::select(ageg10,sex,imp,unk_neg,unk_pos,imp_p,unk_neg_p,unk_pos_p)

# Table 3
tab_3 %>% gt()
```

## 10. Secondary objective -- outcome (dead or alive) Table of outcomes

```{r ten, echo=TRUE} 
a <- spine %>%
  mutate(hivart=case_when(
    hiv=="Positive" & art=="yes_art" ~ "HIV positive, on ART",
    hiv=="Positive" & art=="no_art" ~ "HIV positive, not on ART",
    hiv=="Positive" & is.na(art) ~ "HIV positive, ART status unknown",
    hiv=="Negative" ~ "HIV negative",
    is.na(hiv)==TRUE ~ "HIV status unknown"
  ))  %>%
  tabyl(hivart, outcome) 
tab_3a <- a %>% adorn_percentages() %>% adorn_pct_formatting(digits=1) %>% left_join(a, by="hivart", suffix=c("perc","n"))
tab_3a %>% gt()
```

Mean of all 25 imputations

```{r}
b1 <- spine_imp %>%
  group_by(hiv, .imp, outcome) %>% summarise(n=n()) %>% group_by(hiv, outcome) %>%
  summarise(n=round(mean(n),digits=0)) %>% 
  pivot_wider(names_from=outcome, values_from=n) %>%
  mutate(total=`0` + `1`)

tab_3b <- b1 %>% adorn_percentages() %>% adorn_pct_formatting(digits=1) %>% left_join(b1, by="hiv", suffix=c("perc","n"))
tab_3b %>% gt()
```

```{r}
spine_imp4 <- spine_imp2 %>% filter(.imp!=0) # get rid of original data
b2 <- spine_imp4 %>%
    mutate(hivart=case_when(
    hiv=="Positive" & art=="yes_art" ~ "HIV positive, on ART",
    hiv=="Positive" & art=="no_art" ~ "HIV positive, not on ART",
    hiv=="Negative" ~ "HIV negative",
    hiv_recorded==FALSE & hiv=="Positive" ~ "HIV pos imputed, likely no ART",
  ))  %>%
  group_by(hivart, .imp, outcome) %>% summarise(n=n(), .groups="keep") %>% 
  group_by(hivart, outcome) %>% summarise(n=round(mean(n),digits=0)) %>% 
  pivot_wider(names_from=outcome, values_from=n) %>%
  mutate(total=`0` + `1`)

tab_3c <- b2 %>% adorn_percentages() %>% adorn_pct_formatting(digits=1) %>% left_join(b2, by="hivart", suffix=c("perc","n"))
tab_3c %>% gt()
```

In text (not table), ART use over time

```{r}
## % ART use over time

c <- spine %>%   mutate(hivart=case_when(
    hiv=="Positive" & art=="yes_art" ~ "HIV positive, on ART",
    hiv=="Positive" & art=="no_art" ~ "HIV positive, not on ART",
    hiv=="Positive" & is.na(art) ~ "HIV positive, ART status unknown",
    hiv=="Negative" ~ "HIV negative",
    is.na(hiv)==TRUE ~ "HIV status unknown"
)) %>%
  filter(hiv=="Positive") %>%
  filter(!is.na(art)) %>%
  group_by(quarter) %>%
  mutate(art_y=sum(art=="yes_art")) %>%
  mutate(art_n=sum(art=="no_art")) %>%
  mutate(n_all=n()) %>%
  dplyr::select(art_y,art_n, n_all) %>%
  mutate(p_on_art = art_y/n_all *100) %>%
  distinct()
c %>% arrange(p_on_art) %>% ungroup() %>% gt()
```


## 11. Secondary objective -- outcome (dead or alive) over time

Only looking within PLHIV group.  

```{r eleven, echo=TRUE}
spine_outcome_pos <- spine_imp %>%
  mutate(art=as.character(art)) %>%
  mutate(art=if_else(is.na(art)==TRUE,"no_art",art)) %>%  # add this bit here to make art "NAs" into no_art
  mutate(art=as.factor(art)) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter)) %>%
  filter(hiv=="Positive") %>% # filter for HIV
  nest_by(.imp)

spine_outcome_neg <- spine_imp %>%
  mutate(art=as.character(art)) %>%
  mutate(art=if_else(is.na(art)==TRUE,"no_art",art)) %>%  # add this bit here to make art "NAs" into no_art
  mutate(art=as.factor(art)) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter)) %>%
  filter(hiv=="Negative") %>% # filter for HIV
  nest_by(.imp)

spine_outcome_gp <- spine_imp %>%
  filter(hiv=="Positive") %>% # filter for HIV
  mutate(art=as.character(art)) %>%
  mutate(art=if_else(is.na(art)==TRUE,"no_art",art)) %>%  # add this bit here to make art "NAs" into no_art
  mutate(art=as.factor(art)) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter)) %>%
  mutate(ageg10 = as.factor(ageg10)) %>%
  mutate(q = group_indices(.,quarter)) %>%
  group_by(.imp, q, quarter, ageg10, sex, hiv) %>%  # NB. art statuds (deliberately) not here
  summarise(n_died= sum(outcome==1), n_alive=sum(outcome==0),n_all=n()) %>%
  mutate(odds_death = n_died/n_alive) %>%  # adding in outcome data I need
  mutate(risk_death = n_died/n_all) %>%
  ungroup() %>%
  nest_by(.imp)

model2pred_outcome_imp <- function(df,m){
  
  scaffold_df <- 
  {if (".imp" %in% names(df)==TRUE) df %>% filter(.imp==1) %>% dplyr::select(-.imp) else df} %>% # as before this 'if' statement so that function works for imputed datasets or original [and - also as before - model outouts are exactly the same in each imputed dataset as they are melding across]
  filter(hiv=="Positive") %>% #NB. keeping HIV pos only
  ungroup() %>%
   mutate(q = group_indices(.,quarter)) %>%
  expand(q, ageg10, sex) 
  
  quarter <- spine_imp %>% filter(.imp==1,hiv=="Positive") %>%  mutate(q = group_indices(.,quarter)) %>% dplyr::select(q,quarter)
  
  predict_melded <- tibble(models = m$model) %>%
  ungroup() %>%
  mutate(m = 1:n(),
         fitted = models %>% map(~ augment(., newdata = scaffold_df, se_fit=TRUE))) %>% # this gives fit from each model (I think)
  unnest(fitted) %>%
  dplyr::select(m,q,sex,ageg10,.fitted,.se.fit) %>% # added in select to get rid of model df that was slowwing everything down....
  group_by(q, ageg10, sex) %>%  # Group by each of the variables that you vary
  nest(data= c(m, .fitted, .se.fit))  %>% # need to add 'm' here, otherwise nothing to meld over
  mutate(fitted_melded = data %>% map(~ meld_predictions(.))) %>% 
  unnest(fitted_melded) %>% 
  mutate(rmin = .fitted - (1.96 * .se.fit), 
         rmax = .fitted + (1.96 * .se.fit))
  
response_scale <- predict_melded %>%
  mutate(chance_death = exp(.fitted),
         conf.low=exp(rmin),
         conf.high=exp(rmax)) %>%
  right_join(quarter)
response_scale
}

model2pred_outcome_orig <- function(df,m){
df %>%
    data_grid(q, sex, ageg10, .model = m) %>%
    augment(m, newdata = ., type.predict="link", se_fit=TRUE) %>%
    mutate(rmin = .fitted - (1.96 * .se.fit), 
         rmax = .fitted + (1.96 * .se.fit)) %>%
    left_join(df) %>%
      mutate(chance_death = exp(.fitted),
             conf.low=exp(rmin),
             conf.high=exp(rmax))
}

```

And the model (no ART included)

``` {r elevenb, echo=TRUE}
death_risk_pos <- spine_outcome_pos %>% mutate(model = list(glm(outcome ~ q*ageg10*sex, data = data,family=binomial(link="log")))) #log binomial to approximate risk rather than odds
death_out_risk_pos <- model2pred_outcome_imp(spine_imp,death_risk_pos)

death_risk_neg <- spine_outcome_neg %>% mutate(model = list(glm(outcome ~ q*ageg10*sex, data = data,family=binomial(link="log")))) #log binomial to approximate risk rather than odds
death_out_risk_neg <- model2pred_outcome_imp(spine_imp,death_risk_neg)

label_sum <- spine_imp %>%
  filter(.imp==2, hiv=="Positive") %>%
  group_by(ageg10, sex) %>%
  summarise(sum = sum(n()), .groups="keep")

ggplot() +
  geom_line(data=death_out_risk_pos, aes(x = quarter, y = chance_death),color="#0066CC") +
  geom_ribbon(data=death_out_risk_pos, aes(x=quarter, ymin = conf.low, ymax = conf.high), alpha = 0.2, fill="#1AB8FF") +
  facet_grid(sex ~ ageg10) +
  geom_point(data=spine_outcome_gp %>% unnest(data) %>% filter(.imp==12), aes(x=quarter, y=risk_death),size=0.3) +
  #geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=0.7, label=paste0("n=",sum, sep="")), color="blue") +
  labs(title = "Risk of death if HIV positive and admitted to hospital",
         caption = "HIV status and outcome imputed where missing")+
  ylab("Risk of death (%)") +
  xlab("Year and quarter") +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  coord_cartesian(ylim = c(0,0.6)) +
    theme_bw() %+replace%
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/fig_3.pdf"), width = 9, height = 6, dpi=300)

## What is trend line??
int_df2 <- death_risk_pos %>%
  ungroup() %>%
  filter(.imp == 12) %>% # NB. This is only one (of 25) models
  dplyr::select(model,data)
outcome_model <- int_df2[[1]][[1]]

x <- tidy(outcome_model, conf.int=TRUE)

emtrends(outcome_model, pairwise ~ ageg10|sex, var="q", data=int_df2[[2]][[1]], transform="response")
# reduction in admissions for every age and sex category (emtrends).
```

## 12. Sensitivity analysis (just those with known HIV)

```{r twelve, echo=TRUE}
spine_gp_hivpos_orig <- spine %>% 
 filter(hiv=="Positive") %>% # filter for HIV
  filter(is.na(outcome)==FALSE) %>% #filter out those with no outcome
  mutate(art=as.character(art)) %>%
  mutate(art=if_else(is.na(art)==TRUE,"no_art",art)) %>%  # add this bit here to make art "NAs" into no_art
  mutate(art=as.factor(art)) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter)) %>%
  mutate(ageg10 = as.factor(ageg10)) %>%
  mutate(q = group_indices(.,quarter)) %>%
  group_by(q, quarter, ageg10, sex, hiv) %>% # NB. art statuds (deliberately) not here
  summarise(n_died= sum(outcome==1), n_alive=sum(outcome==0),n_all=n()) %>%
  mutate(odds_death = n_died/n_alive) %>%  # adding in outcome data I need
  mutate(risk_death = n_died/n_all)

b <- spine %>% filter(hiv=="Positive") %>%   mutate(q = group_indices(.,quarter))
death_risk_orig_pos <-glm(outcome ~ q*ageg10*sex,family=binomial(link="log"), data=b)
death_out_risk_pos <- model2pred_outcome_orig(b,death_risk_orig_pos)  

ggplot() +
  geom_line(data=death_out_risk_pos, aes(x = quarter, y = chance_death),color="#7F00FF") +
  geom_ribbon(data=death_out_risk_pos, aes(x=quarter, ymin = conf.low, ymax = conf.high), alpha = 0.2, fill="#7F00FF") +
  facet_grid(sex ~ ageg10) +
  geom_point(data=spine_gp_hivpos_orig, aes(x=quarter, y=risk_death),size=0.3) +
  #geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=0.7, label=paste0("n=",sum, sep="")), color="blue") +
  labs(title = "Risk of death if HIV positive and admitted to hospital (complete case)",
       caption = "Excluding those with missing HIV status, and those without outcome recorded")+
  ylab("Risk of death (%)") +
  xlab("Year and quarter") +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

```


## 13. Does ART make any difference to these preductions?

```{r thirteen, echo=TRUE}
death_risk_pos_art <- spine_outcome_pos %>% mutate(model = list(glm(outcome ~ q*ageg10*sex*art, data = data,family=binomial(link="log")))) # add ART to illustrate / model (even though doesn't improve fit by AIC)

death_risk_pos <- spine_outcome_pos %>% mutate(model = list(glm(outcome ~ q*ageg10*sex, data = data,family=binomial(link="log")))) #log binomial to approximate risk rather than odds

model2pred_outcome_imp2 <- function(df,m){  ## This adds ART
  
  scaffold_df <- 
  {if (".imp" %in% names(df)==TRUE) df %>% filter(.imp==1) %>% dplyr::select(-.imp) else df} %>%
  filter(hiv=="Positive") %>% #NB. keeping HIV pos only
  ungroup() %>%
   mutate(q = group_indices(.,quarter)) %>%
  expand(q, ageg10, sex,art)  ## add in ART here
  
  quarter <- spine_imp %>% filter(.imp==1,hiv=="Positive") %>%  mutate(q = group_indices(.,quarter)) %>% dplyr::select(q,quarter)
  
  predict_melded <- tibble(models = m$model) %>%
  ungroup() %>%
  mutate(m = 1:n(),
         fitted = models %>% map(~ augment(., newdata = scaffold_df, se_fit=TRUE))) %>% # this gives fit from each model (I think)
  unnest(fitted) %>%
  dplyr::select(m,q,sex,ageg10,art,.fitted,.se.fit) %>% # added in select to get rid of model df that was slowwing everything down....
  group_by(q, ageg10, sex) %>%  # Group by each of the variables that you vary
  nest(data= c(m, .fitted, .se.fit))  %>% # need to add 'm' here, otherwise nothing to meld over
  mutate(fitted_melded = data %>% map(~ meld_predictions(.))) %>% 
  unnest(fitted_melded) %>% 
  mutate(rmin = .fitted - (1.96 * .se.fit), 
         rmax = .fitted + (1.96 * .se.fit))
  
response_scale <- predict_melded %>%
  mutate(chance_death = exp(.fitted),
         conf.low=exp(rmin),
         conf.high=exp(rmax)) %>%
  right_join(quarter)
response_scale
}

death_out_risk_pos_art <- model2pred_outcome_imp2(spine_imp,death_risk_pos_art)

models_art <- death_risk_pos_art %>% dplyr::select(model) %>% mutate(glanced=list(glance(model))) %>% unnest(glanced) 
models_no_art <- death_risk_pos %>% dplyr::select(model) %>% mutate(glanced=list(glance(model))) %>% unnest(glanced) 

models_no_art[["AIC"]] - models_art[["AIC"]]
# In every instance, models with no ART had lower AIC than models with ART included

ggplot() +
  geom_line(data=death_out_risk_pos_art%>%filter(art=="no_art"), aes(x = q, y = chance_death, color=art)) +
  geom_line(data=death_out_risk_pos_art%>%filter(art=="yes_art"), aes(x = q, y = chance_death, color=art)) +
  geom_ribbon(data=death_out_risk_pos_art%>%filter(art=="no_art"), aes(x = q, ymax = conf.high, ymin=conf.low, fill=art),alpha=0.3) +
  geom_ribbon(data=death_out_risk_pos_art%>%filter(art=="yes_art"), aes(x = q, ymax = conf.high,ymin=conf.low, fill=art),alpha=0.3) +
  facet_grid(sex ~ ageg10) +
  #geom_point(data=outcome_death %>% filter(hiv=="Positive",art=="yes_art"), aes(x=q, y=risk_death,color=art),size=0.3) +
    #geom_point(data=outcome_death %>% filter(hiv=="Positive",art=="no_art"), aes(x=q, y=risk_death,color=art),size=0.3) +
    #geom_text(data=n_no_art, aes(x=12, y=0.97, 
                #label=paste0("no art=",n, sep=""))) +
      #geom_text(data=n_yes_art, aes(x=12, y=0.80, 
                #label=paste0("yes art=",n, sep=""))) +
  ylab("Risk of death (%)") +
  xlab("Year and quarter") +
  labs(title = "Risk of death if HIV pos and admitted to hospital (by ART status") +
  theme_bw()
```

## 14. Graphs and tables for supplementary appendix


## Supplementary Figure 1: Absolute numbers by age, sex, HIV status and quarter-year

S. Figure 1A: Absolute numbers of admissions, stacked bar chart of crude data.

```{r, echo=TRUE, warning=FALSE, message=FALSE,fig.width=9}
spine_gp <- spine %>%
  group_by(quarter, ageg10, sex, hiv) %>%
  summarise(n=n()) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter))

spine_gp %>%  
  mutate(HIV=as.character(hiv)) %>%
  mutate(HIV=replace_na(HIV,"Unknown / Missing")) %>%
  mutate(HIV=factor(HIV, levels=c("Unknown / Missing", "Negative","Positive"))) %>%
  ggplot() +
  geom_bar(aes(fill=HIV,x=quarter, y=n), stat="identity") +
  geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=200, 
                label=paste0("n=",sum, sep=""))) +
  facet_grid(sex ~ageg10) + 
  #scale_fill_manual(values=wes_palettes$FantasticFox1[c(2,3,5)]) +
  scale_fill_manual(values=wes_palette("BottleRocket2",3)) +
    labs(
       x="Year and quarter",
       y="Number of adult admissions",
      fill="HIV status") +
  scale_y_continuous() +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S1A.pdf"), width = 10, height = 6, dpi=300)
```


S. Figure 1B: Population level incidence of admissions, stacked bar chart of crude data.

```{r, echo=TRUE, warning=FALSE, message=FALSE,fig.width=9}
both<- left_join(spine_gp, cens_edit) %>%
  mutate(n = case_when(is.na(n) ~ 0L,
                       TRUE ~ n)) %>%  # 'n' is now from n_imputed
  mutate(inc = (n/pop) * 100000) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter))

both %>%  
  mutate(HIV=as.character(hiv)) %>%
  mutate(HIV=replace_na(HIV,"Unknown / Missing")) %>%
  mutate(HIV=factor(HIV, levels=c("Unknown / Missing", "Negative","Positive"))) %>%
  ggplot() +
  geom_bar(aes(fill=HIV,x=quarter, y=inc), stat="identity") +
  geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=700, 
                label=paste0("n=",sum, sep=""))) +
  facet_grid(sex ~ageg10) + 
  scale_fill_manual(values=wes_palette("BottleRocket2",3)) +
    labs(
       x="Year and quarter",
       y="Incidence of adult admission (per 100,000 people)",
      fill="HIV status") +
  scale_y_continuous() +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S1B.pdf"), width = 10, height = 6, dpi=300)

```


### Supplementary table 1: Blantyre census for 2008 and 2018

  
S Table 1A: Measured population Blantyre (includes Blantyre City and Blantyre Rural administrative districts) at 2008 and 2018 census

```{r, echo=TRUE, warning=FALSE, message=FALSE}
load(here("data-raw", "blantyre_census_by_q.rda"))
cens <- blantyre_census_by_q 
age_levels10 <- c('15-24','25-34','35-44','45-54','55-64','65+')

cens <- cens %>%
  mutate(quarter=yq(year_q)) %>% 
  rename(ageg5 = age)

cens <- cens %>%
  mutate(ageg10 = case_when(ageg5 == "15-19" ~ "15-24",
                            ageg5 == "20-24" ~ "15-24",
                            ageg5 == "25-29" ~ "25-34",
                            ageg5 == "30-34" ~ "25-34",
                            ageg5 == "35-39" ~ "35-44",
                            ageg5 == "40-44" ~ "35-44",
                            ageg5 == "45-49" ~ "45-54",
                            ageg5 == "50-54" ~ "45-54",
                            ageg5 == "55-59" ~ "55-64",
                            ageg5 == "60-64" ~ "55-64",
                            ageg5 == "65-69" ~ "65+",
                            ageg5 == "70-74" ~ "65+",
                            ageg5 == "75+" ~ "65+",
                            ))

# make census agegroups into a factor
cens$ageg10 <- ordered(cens$ageg10, levels=age_levels10)

cens_adults <- cens %>%
  filter(ageg5>="15-19") #removes children (and this works because now an ordered factor)


# Get relevant census data
cens2 <- cens_adults %>% #merges Blantyre city and Blantyre rural
  group_by(quarter, ageg10, sex) %>%
  summarise(pop=sum(population)) %>%
  ungroup () %>%
  dplyr::select(pop, quarter, ageg10, sex) %>%
  filter(!sex=="total") #gets rid of total group - keep just males and females

cens2 %>% filter(quarter=="2008-04-01" | quarter=="2018-04-01") %>% pivot_wider(names_from = quarter, values_from=pop) %>% gt()

```
  
    
S Table 1B: Population Blantyre at start (Q4.2012) and end (Q3.2019) study period [calculated using linear interpolation and extrapolation]

```{r, echo=TRUE, warning=FALSE, message=FALSE}
cens_edit%>% filter(quarter=="2012-10-01" | quarter=="2019-07-01") %>% pivot_wider(names_from = quarter, values_from=pop)  %>% 
  mutate(change=
           paste0(
             round(
             (`2019-07-01` - `2012-10-01`) / `2019-07-01` * 100,digits=0), "%"
  )) %>% ungroup() %>%
  gt()

```

### Supplementary figure 2: Sensitvity analysis for incidence (imputation)

  
S. Figure 2A: Everyone with HIV status unknown / missing is treated as if HIV negative

```{r, echo=TRUE, warning=FALSE, message=FALSE}
out_pos %>%
  ggplot() +
  geom_line(aes(x=quarter, y=inc), colour="#1AB8FF") +
  geom_ribbon(aes(x=quarter, ymin =inc_low, ymax =inc_high), alpha = .3, fill="#1AB8FF") +
  geom_point(data= both_orig_hivpos, aes(x=quarter, y=inc),size=0.5) +
  facet_grid(sex ~ ageg10) +
  labs(title="Incidence of 'known HIV+' hospital admission",
       subtitle = "Emprical (dots) and model-predicted estimates",
       x="Year and quarter",
       y="Incidence per 100,000 people (95% CIs)",
       caption = "Negative binomial model. Spline term 3 knots. People with unknown HIV status treated as if negative") +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S2A.pdf"), width = 10, height = 6, dpi=300)

```

  
S. Figure 2B: Everyone with HIV status unknown / missing is treated as if HIV positive

```{r, echo=FALSE, warning=FALSE, message=FALSE}
out_unk %>%
  ggplot() +
  geom_line(aes(x=quarter, y=inc), colour="red") +
  geom_ribbon(aes(x=quarter, ymin = inc_low, ymax = inc_high), alpha = .3, fill="red") +
  geom_point(data= both_orig_hivposunk, aes(x=quarter, y=inc),size=0.5) +
  facet_grid(sex ~ ageg10) +
  labs(title="Incidence of HIV positve hospital admission - HIV unknown assumed positive",
       subtitle = "Emprical (dots) and model-predicted estimates",
       x="Year and quarter",
       y="Incidence per 100,000 people (95% CIs)",
       caption = "Negative binomial model") +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing = unit(1, "lines"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S2B.pdf"), width = 10, height = 6, dpi=300)

```


### Supplementary figure 3A: Sensitivity analysis for incidence (choice of model)
  
    
S. Figure 3A: Negative binomial distribution without spline terms

```{r, echo=TRUE, warning=FALSE, message=FALSE}
model_imp_n0 <- by_imp %>% mutate(model = list(glm.nb(n ~ q*ageg10*sex, data = data %>% filter(hiv=="Positive"),
                                                offset(log(pop)))))
                                               

out_n0 <- model2pred_imp(both_imp_gp,model_imp_n0,q,ageg10,sex,"Positive")                                                                                        

ggplot() +
  geom_line(data=out_n0, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_n0, aes(x=quarter, ymin = inc_low, ymax = inc_high, fill=hiv), alpha = 0.2) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Positive"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  facet_grid(sex ~ ageg10) +
  scale_x_date(breaks=breaks1, date_labels = "%b%Y ") +
  scale_y_continuous(limits=c(0,500)) +
  scale_color_manual(values=c("Positive"="#00A08A","Negative" = "grey")) +
  scale_fill_manual(values=c("Positive"="#00A08A","Negative" = "#eeeeee")) +
  labs(
    subtitle ="Negative binomial, no spline term",
    caption="Negative binomial model without splines \n Points from one (of 25) imputed dataset, estimates merged across all imputed datasets") +
  ylab("Incidence of admission per 100,000 population") +
  xlab("Year and quarter") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S3A.pdf"), width = 10, height = 6, dpi=300)


```

  
S. Figure 3B: Negative binomial distribution with 5 knots in spline

```{r, echo=TRUE, warning=FALSE, message=FALSE}
model_imp_n5 <- by_imp %>% mutate(model = list(glm.nb(n ~ ns(q, knots = 5)*ageg10*sex, data = data %>% filter(hiv=="Positive"),
                                                offset(log(pop)))))
                                               

out_n5 <- model2pred_imp(both_imp_gp,model_imp_n5,q,ageg10,sex,"Positive")                                                                                        

ggplot() +
  geom_line(data=out_n5, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_n5, aes(x=quarter, ymin = inc_low, ymax = inc_high, fill=hiv), alpha = 0.2) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Positive"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  facet_grid(sex ~ ageg10) +
  scale_x_date(breaks=breaks1, date_labels = "%b%Y ") +
  scale_y_continuous(limits=c(0,500)) +
  scale_color_manual(values=c("Positive"="#00A08A","Negative" = "grey")) +
  scale_fill_manual(values=c("Positive"="#00A08A","Negative" = "#eeeeee")) +
  labs(
    subtitle ="Negative binomial, spline term with 5 knots ",
    caption="Negative binomial model with splines w. 5 knots \n Points from one (of 25) imputed dataset, estimates merged across all imputed datasets") +
  ylab("Incidence of admission per 100,000 population") +
  xlab("Year and quarter") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S3B.pdf"), width = 10, height = 6, dpi=300)


```

  
S. Figure 3C: Poisson distribution, 3 knot splines

```{r, echo=TRUE, warning=FALSE, message=FALSE}
model_imp_p3 <- by_imp %>% mutate(model = list(glm(n ~ ns(q, knots = 3)*ageg10*sex, data = data %>% filter(hiv=="Positive"), family="poisson",
                                                offset(log(pop)))))
                                               

out_p3 <- model2pred_imp(both_imp_gp,model_imp_p3,q,ageg10,sex,"Positive")                                                                                        

ggplot() +
  geom_line(data=out_p3, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_p3, aes(x=quarter, ymin = inc_low, ymax = inc_high, fill=hiv), alpha = 0.2) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Positive"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  facet_grid(sex ~ ageg10) +
  scale_x_date(breaks=breaks1, date_labels = "%b%Y ") +
  scale_y_continuous(limits=c(0,500)) +
  scale_color_manual(values=c("Positive"="#00A08A","Negative" = "grey")) +
  scale_fill_manual(values=c("Positive"="#00A08A","Negative" = "#eeeeee")) +
  labs(
    subtitle ="Poisson distribution, spline term with 3 knots",
    caption="Poisson model with splines w. 3 knots \n Points from one (of 25) imputed dataset, estimates merged across all imputed datasets") +
  ylab("Incidence of admission per 100,000 population") +
  xlab("Year and quarter") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S3C.pdf"), width = 10, height = 6, dpi=300)


```

  
S. Figure 3D: Gamma distbution, no splines

```{r, echo=TRUE, warning=FALSE, message=FALSE}
model_imp_g0 <- by_imp %>% mutate(model = list(glm(n ~ q*ageg10*sex, data = data %>% filter(hiv=="Positive"), family=Gamma(link="log"),
                                                offset(log(pop)))))
                                               

out_g0 <- model2pred_imp(both_imp_gp,model_imp_g0,q,ageg10,sex,"Positive")  

ggplot() +
  geom_line(data=out_g0, aes(x = quarter, y = inc, color=hiv)) +
  geom_ribbon(data=out_g0, aes(x=quarter, ymin = inc_low, ymax = inc_high, fill=hiv), alpha = 0.2) +
  geom_point(data=both_imp_gp %>% filter(.imp==12) %>% filter(hiv=="Positive"), aes(x=quarter, y=((n_imputed/pop)*100000), color=hiv),size=0.1) +
  facet_grid(sex ~ ageg10) +
  scale_x_date(breaks=breaks1, date_labels = "%b%Y ") +
  scale_y_continuous(limits=c(0,500)) +
  scale_color_manual(values=c("Positive"="#00A08A","Negative" = "grey")) +
  scale_fill_manual(values=c("Positive"="#00A08A","Negative" = "#eeeeee")) +
  labs(
    subtitle ="Gamma distibution",
    caption="Gamma model (no splines) \n Points from one (of 25) imputed dataset, estimates merged across all imputed datasets") +
  ylab("Incidence of admission per 100,000 population") +
  xlab("Year and quarter") +
    theme_bw() %+replace%
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))


ggsave(here("figures/S3D.pdf"), width = 10, height = 6, dpi=300)

```


### Supplementary table 2: Death risk by age group and sex
  
    
Table 2A: Crude Data

```{r, echo=TRUE, warning=FALSE, message=FALSE, results='asis'}
a <- spine %>%
  mutate(hivart=case_when(
    hiv=="Positive" & art=="yes_art" ~ "HIV positive, on ART",
    hiv=="Positive" & art=="no_art" ~ "HIV positive, not on ART",
    hiv=="Positive" & is.na(art) ~ "HIV positive, ART status unknown",
    hiv=="Negative" ~ "HIV negative",
    is.na(hiv)==TRUE ~ "HIV status unknown"
  )) %>%
  mutate(ageg10=as.character(ageg10)) %>%
  mutate(outcome = case_when(
    outcome==1 ~ "Died",
    outcome==0 ~ "Discharged alive"
  ))

x <- tableby(includeNA(outcome) ~ ageg10 +sex, data=a, strata=hivart, total=FALSE, test=FALSE, cat.stats="countrowpct")
summary(x, title="Crude data")

```
  
    
Table 2B: After imputation

```{r, echo=TRUE, warning=FALSE, message=FALSE, results='asis'}
b <- spine_imp %>% filter(.imp==2) %>%
    mutate(hivart=case_when(
    hiv=="Positive" & art=="yes_art" ~ "HIV positive, on ART",
    hiv=="Positive" & art=="no_art" ~ "HIV positive, not on ART",
    hiv=="Positive" & is.na(art) ~ "HIV positive by imputation, assume no ART for model",
    hiv=="Negative" ~ "HIV negative",
    is.na(hiv)==TRUE ~ "HIV status unknown"
  ))  %>%
  mutate(ageg10=as.character(ageg10)) %>%
  mutate(outcome = case_when(
    outcome==1 ~ "Died",
    outcome==0 ~ "Discharged alive"
  ))

y <- tableby(includeNA(outcome) ~ ageg10 +sex, data=b, strata=hivart, total=FALSE, test=FALSE, cat.stats="countrowpct")
print(summary(y, title="After imputation for missing data"))

```

### Supplementary figure 4: Sensitivity analysis risk of death
  
    
Complete case analysis, HIV and outcome unknown status removed

```{r, echo=TRUE, warning=FALSE, message=FALSE}
spine_gp_hivpos_orig <- spine %>% 
 filter(hiv=="Positive") %>% # filter for HIV
  filter(is.na(outcome)==FALSE) %>% #filter out those with no outcome
  mutate(art=as.character(art)) %>%
  mutate(art=if_else(is.na(art)==TRUE,"no_art",art)) %>%  # add this bit here to make art "NAs" into no_art
  mutate(art=as.factor(art)) %>%
  ungroup() %>%
  mutate(q = group_indices(.,quarter)) %>%
  mutate(ageg10 = as.factor(ageg10)) %>%
  mutate(q = group_indices(.,quarter)) %>%
  group_by(q, quarter, ageg10, sex, hiv) %>% # NB. art statuds (deliberately) not here
  summarise(n_died= sum(outcome==1), n_alive=sum(outcome==0),n_all=n()) %>%
  mutate(odds_death = n_died/n_alive) %>%  # adding in outcome data I need
  mutate(risk_death = n_died/n_all)

b <- spine %>% filter(hiv=="Positive") %>%   mutate(q = group_indices(.,quarter))
death_risk_orig_pos <-glm(outcome ~ q*ageg10*sex,family=binomial(link="log"), data=b)
death_out_risk_pos <- model2pred_outcome_orig(b,death_risk_orig_pos)  ## Need a new model2pred for dfs not needing imputation

ggplot() +
  geom_line(data=death_out_risk_pos, aes(x = quarter, y = chance_death),color="#0066CC") +
  geom_ribbon(data=death_out_risk_pos, aes(x=quarter, ymin = conf.low, ymax = conf.high), alpha = 0.2, fill="#1AB8FF") +
  facet_grid(sex ~ ageg10) +
  geom_point(data=spine_gp_hivpos_orig, aes(x=quarter, y=risk_death),size=0.3) +
  #geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=0.7, label=paste0("n=",sum, sep="")), color="blue") +
  labs(title = "Risk of death if HIV positive and admitted to hospital",
       caption = "Excluding those with missing HIV status, and those without outcome recorded")+
  ylab("Risk of death (%)") +
  xlab("Year and quarter") +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S4.pdf"), width = 10, height = 6, dpi=300)


```

### Supplementary figure 5: Risk of death including ART as a covariate
  
    
Plot model-predicted risk of death from model incorporating age group, sex, quarter-year and ART status

```{r, echo=TRUE, warning=FALSE, message=FALSE}
ggplot() +
  geom_line(data=death_out_risk_pos_art%>%filter(art=="no_art"), aes(x = quarter, y = chance_death, color=art)) +
  geom_line(data=death_out_risk_pos_art%>%filter(art=="yes_art"), aes(x = quarter, y = chance_death, color=art)) +
  geom_ribbon(data=death_out_risk_pos_art%>%filter(art=="no_art"), aes(x = quarter, ymax = conf.high, ymin=conf.low, fill=art),alpha=0.3) +
  geom_ribbon(data=death_out_risk_pos_art%>%filter(art=="yes_art"), aes(x = quarter, ymax = conf.high,ymin=conf.low, fill=art),alpha=0.3) +
  facet_grid(sex ~ ageg10) +
  labs(title = "Risk of death if HIV positive and admitted to hospital",
       y="Risk of death (%)",
       x="Year and quarter",
       color="ART status",
       fill="ART status")   +
  scale_color_manual(values = c("#46ACC8","#B40F20")) +
  scale_fill_manual(values = c("#46ACC8","#B40F20")) +
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  theme_bw() +
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S5.pdf"), width = 10, height = 6, dpi=300)


```


### Supplementary figure 6: Risk of death over time including HIV negative

```{r, echo=TRUE, warning=FALSE, message=FALSE}
ggplot() +
  geom_line(data=death_out_risk_pos, aes(x = quarter, y = chance_death,color="Positive")) +
  geom_ribbon(data=death_out_risk_pos, aes(x=quarter, ymin = conf.low, ymax = conf.high, fill="Positive"), alpha = 0.2) +
  geom_line(data=death_out_risk_neg, aes(x = quarter, y = chance_death,color="Negative")) +
  geom_ribbon(data=death_out_risk_neg, aes(x=quarter, ymin = conf.low, ymax = conf.high, fill="Negative"), alpha = 0.2) +
  facet_grid(sex ~ ageg10) +
  #geom_point(data=spine_outcome_gp %>% unnest(data) %>% filter(.imp==12), aes(x=quarter, y=risk_death),size=0.3) +
  #geom_text(data=label_sum, aes(x=ymd("2014-10-01"), y=0.7, label=paste0("n=",sum, sep="")), color="blue") +
  labs(title = "Risk of death if HIV positive and admitted to hospital",
       fill="HIV status",
       color="HIV status")+
  ylab("Risk of death (%)") +
  xlab("Year and quarter") +
  scale_color_manual(values = wes_palette("BottleRocket2",2, type="discrete")) + 
  scale_fill_manual(values = wes_palette("BottleRocket2",2, type="discrete")) + 
  scale_x_date(breaks=breaks1, date_labels = "%b%y ") +
  coord_cartesian(ylim = c(0,1)) +
  theme_bw() +
  theme(panel.spacing.x=unit(0.5, "cm"),
        axis.text.x=element_text(angle = 90))

ggsave(here("figures/S6.pdf"), width = 10, height = 6, dpi=300)

```

### Supplementary table 3: Trend in outcome by age and sex

Using "emtrends" from "emmeans" package.  In every age and sex group, confidence intervals for trend by quarter cross null effect.

```{r, echo=TRUE}
## What is trend line??
int_df2 <- death_risk_pos %>%
  ungroup() %>%
  filter(.imp == 12) %>% # NB. This is only one (of 25) models
  dplyr::select(model,data)

outcome_model <- int_df2[[1]][[1]]

a <- emtrends(outcome_model, pairwise ~ ageg10|sex, var="q", data=int_df2[[2]][[1]], transform="response") 
b <- print(a$emtrends) 
```

```{r, echo=TRUE}
b %>% gt()
```