Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expand survival data geoms with geom_table_risk etc. #20

Open
Generalized opened this issue Dec 26, 2020 · 8 comments
Open

expand survival data geoms with geom_table_risk etc. #20

Generalized opened this issue Dec 26, 2020 · 8 comments

Comments

@Generalized
Copy link

Generalized commented Dec 26, 2020

Hello,

I'm trying to achieve faceted KM plots. Your package makes it awesome easy with geom_XX syntax. Much easier than in other packages. It works for the geom_km (with the issue that it doesn't start the Y axis from 0 initially) and geom_band, yet it fails at geom_ticks for censored data. Let me show an example.

The data:

d <- structure(list(sex = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 2L, 
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 
2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 
2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L), .Label = c("Male", 
"Female"), class = "factor"), RNR = structure(c(1L, 2L, 2L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 
1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 
2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
1L, 1L), .Label = c("Resectable", "Non-resectable"), class = "factor"), 
    TRT = structure(c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
    2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 
    2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
    1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
    2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 
    1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 1L, 2L, 2L), .Label = c("A", "B"), class = "factor"), 
    status = c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 
    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 
    0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), time = c(16L, 97L, 98L, 52L, 78L, 52L, 51L, 100L, 52L, 
    50L, 96L, 45L, 27L, 57L, 52L, 100L, 96L, 100L, 99L, 49L, 
    100L, 100L, 100L, 96L, 100L, 19L, 93L, 52L, 98L, 100L, 100L, 
    53L, 100L, 100L, 97L, 98L, 100L, 100L, 96L, 98L, 98L, 100L, 
    98L, 52L, 51L, 95L, 50L, 98L, 100L, 99L, 100L, 100L, 49L, 
    99L, 97L, 100L, 50L, 100L, 52L, 100L, 99L, 43L, 49L, 45L, 
    52L, 94L, 49L, 98L, 57L, 24L, 52L, 98L, 50L, 80L, 50L, 100L, 
    48L, 82L, 46L, 46L, 10L, 52L, 51L, 95L, 18L, 97L, 100L, 97L, 
    44L, 100L, 96L, 100L, 96L, 99L, 52L, 100L, 95L, 99L, 97L, 
    100L, 97L, 55L, 52L, 100L, 97L, 100L, 100L, 97L, 76L, 100L, 
    100L, 100L, 100L, 52L, 52L, 93L, 100L, 45L, 47L, 98L, 100L, 
    93L, 98L, 100L, 46L, 52L, 46L, 49L, 52L, 50L, 52L, 52L, 100L, 
    100L, 99L, 42L, 45L, 54L, 99L, 81L, 52L, 97L, 13L, 18L, 96L, 
    95L, 99L, 99L, 100L, 99L)), row.names = c(NA, -150L), class = "data.frame")

Head:

> head(d)
     sex            RNR TRT status time
1 Female     Resectable   A      0   16
2 Female Non-resectable   A      1   97
3   Male Non-resectable   B      0   98
4   Male     Resectable   A      0   52
5   Male     Resectable   A      0   78
6   Male Non-resectable   A      0   52

Plotting:

> ggplot(d, aes(time = time, status = status, color=sex)) +
+   geom_km()+
+   geom_kmticks()+
+   facet_grid(TRT ~ RNR)

Warnings:
obraz

obraz

and the output from the SurvMiner for a comparison:

obraz


EDIT:

OK, I fixed the issue on my own. The missing function comes from the package scales
When I loaded it, it worked.
Maybe you should pre-load it in your code? People may forget doing that.

> library(scales)
> ggplot(d, aes(time = time, status = status, color=sex)) +
+   geom_km()+
+   geom_kmticks()+
+   facet_grid(TRT ~ RNR)
> # no issue

obraz

@smouksassi
Copy link
Owner

Thanks I will keep it open to remind me to fix it in the next update.
Please not that the Shiny app has a way to add a risk table with the median survival and CI but I am yet to implement it into a function.

@Generalized
Copy link
Author

Generalized commented Dec 26, 2020

Thank you very much.

Well, I don't use Shiny for anything, I rather needed it for a statistical report, where, indeed, I use the median + its CI. I add it easily with just 2 lines to the graph.

What I love the most in your approach is that:

  1. it perfectly integrates with the ggplot2-like syntax: geom_XX. I don't like monoliths like the SurvMiner. I'd love to have all those functionalities split into regular geom_ verbs.

geom_do_this + geom_do_that + geom_do_even_more. That's the way it should go rather than a_monstrous_function(with_dozens_of_parameters_not_easily_extendable)

  1. it enables the facet_XX functionality to any level handled by the ggplot2. SurvMiner allows me to use only 2 splitting factors. When I need 3 (which is typical in the pharmaceutical industry), I have a problem.

With your approach I can do the following easily:

obraz

or even more:

obraz

I think you could easily make a separate (that's important!) competing to the SurvMiner package for the ggplot2-based survival plotting, where each "unit functionality" is split into a corresponding geom_ or stat_ function (to expose all the important settings and parameters for advanced statisticians).

Exactly they way you do. It's really weird, it's been missing for so long time! I'd love to see it.

People, who do serious statistics (not just eye-catching presentations, with different fonts and colors), care more of the syntax-consistency and functionality.

Let me propose a few "geoms_" you might want to consider adding in the future:

  1. geom_km - done
  2. geom_cox - for covariate adjusted curves. With appropriate stat_ exposing the necessary parameters, like the way of calculating the baseline hazard, strata, cluster, estimator of variance, etc.
  3. geom_cmprsk - for competing risks (ideally both Fine-Gray or cause-specific-hazard)
  4. geom_recur - for recurrent events (e.g. Andersen-Gill extension of Cox)
  5. geom_atf (regression models, like the accelerated failure model, with survreg(), one of the most powerful tools in survival analysis, when the PH assumption fails)
  6. geom_ticks - done
  7. geom_band - done, but both for pointwise and simultaneous (for comparing entire curves) + appropriate stat_
  8. geom_compare_pairwise - adding a set of p-values for pairwise comparison of curves (with appropriate correction, can use the p.adjust() from R)
  9. geom_compare_overall - for the overall comparison of curves, like the Mantel-Hanshel log=rank, Peto-Peto modif. of the Gehan-Wilcoxon, Fleming-Harrington (with all the parameters exposed!), Gray (for competing risks). Those methods are important, as each looks at different time of changes between the curves. logrank looks at the middle, Gehan-Wilcoxon when the proportionality of hazards is violated (but curves do not cross) in that the failure times are log-normally distributed (with equal variances, as in the accelerated failure time model), indicating early differences between the survivals, Fleming-Harrington with p=0 and q=1 is used when the proportionality of hazards is violated (but curves do not cross) and late differences occur (based on the Nelson-Aalen estimator of the cumulative hazard). I use them all at work
  10. geom_cif (1-KM for unadjusted curves + covariate adjusted ones)
  11. geom_table - adds a table with number at risk, number of events and number of censored ones (could be split into geom_table_risk, geom_table_events, geom_table_censored)
  12. geom_zph - for Cox diagnostis, plotting the diagnostic plots from the cox.zph
  13. geom_median_time (with CI)
  14. geom_quantile_time (for other quantiles)

It's very important to make it a separate package. Why?

  1. it's easier to find (indexed by Google, placed in the appropriate R CRAN TaskViews). I would never think I will find geom_km in... ggquickeda package. I generally avoid anything that is "quick" (because it often means "rudimentary") and I don't like monoliths. I found this by a pure accident (and I'm happy!)
  2. for you it's easier to maintain, extend and validate with unit tests. Also - it's easier to publish.
  3. You risk less of being removed from the CRAN, if some of the external dependencies will fail at CRAN checking. Because smaller package means fewer dependencies
  4. you can add a nice hex-sticker to it, make dedicated vignette and web page. People don't have to read a huge documentation of the ggquickeda, if they need the survival analysis
  5. the more it's focused on a specific task, the better you control everything
  6. the geom_ functions don't mix with other tasks, from other areas.

I hope I encouraged you to consider starting this journey! You already did well with the basic geom_km. You saved my life, now I don't have to wrap the survfit with ggplot2 calls, having yours - ready to work :)

PS: Please consider adding your examples into the official ggplot2 extensions library!
https://github.com/ggplot2-exts/gallery#adding-a-ggplot2-extension

@smouksassi
Copy link
Owner

Thanks for the valuable feedback. Initially this was written by @sachsmc and he gracefully accepted to be co-author and to include his code as part of ggquickeda. I do believe geom_km and friends is the most ggplot2 like amongst other solutions. I rewrote the ribbon code as at the time there was no stepped ribbon and also wrote it in a way that ggplotly would work. My sole purpose was to enable flexible km curves with facets grouping linetype etc. in my ggquickeda package .
You bring a lot of points motivates me to re work on it as a separate package that ggquickeda depends on as you see there is value versus survminer GGally and others.

@Generalized
Copy link
Author

Generalized commented Dec 27, 2020

This is a great news! I hope the 2021 will be the year of birth of your new awesome package!

Let me show you what I did (quite quickly!) using your functions and a couple of additional tidyr-related coding (but no rocket-science here!)

obraz

MST stands for the median survival time. I could easily extend it by p-values and other stats, if space permits, but I will provide it in a separate table anyway. Just wanted to illustrate it. Sure, I could make something similar with the SurvMiner, but it would be limited only to 2 dimensions. With the flexibility of the ggplot2 faceting mechanism (enabled for the KM via your functions) I don't have to care of how many levels are needed by my client (now or in the future). I just sit, code and report.

@smouksassi and @sachsmc, please accept my deepest gratitude for your work!

PS: this is fake data, nothing secret is revealed here.

You may find useful a small piece of code, that translates the output of survfit into median survival times with grouping variables:

 summary(fit)$table %>% 
+     as.data.frame %>% 
+     tibble::rownames_to_column() %>% 
+     tidyr::separate(rowname, sep=",", into=paste0("gr_", c("column1", "column2", "column....."))) %>% 
+     mutate(across(starts_with("gr_"), ~trimws(gsub(paste0(cur_column(), "="), "", .))))

with the following output (fit, and the processed fit):

obraz

PS: I will have to look later on the time conversion that SurvMiner can do for me in case I need other time units.
For example:

.format_xticklabels <- function(labels, xscale){
  # 1 year = 365.25 days
  # 1 month = 365.25/12 = 30.4375 days
  if(is.numeric(xscale)) xtrans <- 1/xscale
  else
    xtrans <- switch(xscale,
                     d_m = 12/365.25,
                     d_y = 1/365.25,
                     m_d = 365.25/12,
                     m_y = 1/12,
                     y_d = 365.25,
                     y_m = 12,
                     1
    )
  round(labels*xtrans,1)
}

@Generalized
Copy link
Author

Generalized commented Feb 23, 2021

@smouksassi
Please look, how nicely things can be produced with your code. I put it here to encourage you to not abandon the survival functions in the future :) 🥇

Overall curves
obraz

fits <- lapply(c("sex", "age", "RNR", "TRT", "N2O"), function(strata)
    survfit(as.formula( paste("Surv(time, status) ~", strata)), data=survival_data))

fit_summs <- lapply(fits, function(fit)
    summary(fit, times = seq(0, 200, 50), extend = TRUE))

do.call(rbind, 
         lapply(fit_summs, function(fitsumm)
             with(fitsumm, data.frame(value    = strata, 
                                      time     = time, 
                                      n.risk   = n.risk, 
                                      n.censor = n.censor, 
                                      n.event  = n.event)) %>% 
                 mutate(value = trimws(gsub(".*=", "", value)))
         )) -> surv_table

surv_table$value <- factor(surv_table$value)

do.call(rbind, 
        lapply(fit_summs, function(fitsumm)
            fitsumm$table %>% 
                as.data.frame %>% 
                tibble::rownames_to_column() %>% 
                mutate(value = trimws(gsub(".*=", "", rowname))) %>% 
                mutate(time=NA, status=NA)
        )) %>% 
    mutate(MST_line = row_number(),
           MST      = ifelse(is.na(median), "-", 
                             as.character(median))) -> median_st

median_st$value <- factor(median_st$value)

survival_data %>% 
    tidyr::pivot_longer(cols =  c(sex, age, N2O, RNR, TRT), 
                        names_to = "variable", 
                        values_to = "value") -> combined_data

(grA <- ggplot(combined_data, aes(time = time, status = status, color=value)) +
        geom_km()+
        geom_kmticks()+
        theme_bw() +
        theme(legend.position = "top") +
        ggtitle("Progression-free survival of males and females", 
                subtitle = "by the treatment, resectable/non-resectable and N2/non-N2 groups") +
        geom_text(data=median_st, 
                  aes(x=0, y=0.06*MST_line, label=paste0(value, ": ")), 
                  hjust = 0, 
                  size=3, 
                  show.legend = FALSE) +
        geom_text(data=median_st, 
                  aes(x=40, y=0.06*MST_line, label=MST),
                  hjust = 0, 
                  size=3, 
                  show.legend = FALSE) +
        geom_text(data=median_st, 
                  aes(x=0, y=(max(MST_line) + 1)*0.06, 
                      label="Median Survival Time:"), 
                  hjust = 0,
                  size=3, 
                  show.legend = FALSE, 
                  color="gray30") +
        xlab("Time [months]") +
        ylab("Survival probability") +
        labs(color = "Strata") +
        scale_y_continuous(labels = scales::percent_format(accuracy = 1)))

(grB <- ggplot(surv_table) +
        geom_text(aes(x = time, 
                      label = sprintf("%3d (%3d)", n.risk, n.event), 
                      y=value),
                  size=2.8, 
                  show.legend = FALSE) +
        theme_bw() +
        ylab(NULL) + 
        xlab(NULL) +
        theme(panel.grid.minor.x = element_blank(),
              panel.grid.major.x = element_blank()) +
        ggtitle(label = "Number at risk and events over time",
                subtitle = "#at risk (#events)") +
        theme(plot.title = element_text(size=10),
              plot.subtitle = element_text(size=8))
)

library(patchwork)
grA/grB

Curves split by appropriate factors (faceted):

obraz

current_survival_data <- survival_overall_data %>% 
    filter(sex != "Overall") %>% 
    droplevels()

fit <- survfit( Surv(time, status) ~ sex + TRT + RNR + N2O, 
                data = current_survival_data)

summ_fit <- summary(fit, times = c(0, 50, 100, 150, 200), extend = TRUE) 

with(summ_fit, 
    data.frame(strata   = strata,
               time     = time,
               n.risk   = n.risk,
               n.censor = n.censor,
               n.event  = n.event)) %>% 
    tidyr::separate(strata,
                    sep=",",
                    into=c("sex", "TRT", "RNR", "N2O")) %>% 
    mutate(across(1:4, ~trimws(gsub(paste0(cur_column(), "="), "", .)))) %>% 
    mutate(status=NA) -> surv_table

# used to draw the median ST lines and print the numbers
summ_fit$table %>% 
    as.data.frame %>% 
    tibble::rownames_to_column() %>% 
    tidyr::separate(rowname, sep=",", into=c("sex", "TRT", "RNR", "N2O")) %>% 
    mutate(across(1:4, ~trimws(gsub(paste0(cur_column(), "="), "", .)))) %>% 
    mutate(MST_line = as.numeric(as.factor(sex)),
           MST      = ifelse(is.na(median), "-", as.character(median)),
           time     = NA,
           status   = NA) -> median_st

# The KM plot
ggplot(current_survival_data,
       aes(time = time, status = status, color=sex)) +
    facet_grid(TRT ~ RNR + N2O) +
    geom_segment(data=median_st, 
                 aes(x=0, xend=median, y=0.5, yend=0.5), 
                 linetype="dashed", 
                 size=1.05, 
                 color="gray") +
    geom_segment(data=median_st, 
                 aes(x=median, xend=median, y=0, yend=0.5), 
                 linetype="dashed", 
                 size=1.05, 
                 color="gray") +
    geom_km()+
    geom_kmticks()+
    theme_bw() +
    theme(legend.position = "top") +
    ggtitle("Progression-free survival of males and females", 
            subtitle = "by the treatment, resectable/non-resectable and N2/non-N2 groups") +
    geom_text(data=median_st, 
              aes(x=0, y=0.09*MST_line, label=paste0(sex, ":")), 
              hjust = 0, size=3, 
              show.legend = FALSE) +
    geom_text(data=median_st, 
              aes(x=60, y=0.09*MST_line, label=MST), 
              hjust = 0, size=3, 
              show.legend = FALSE) +
    geom_text(data=median_st, 
              aes(x=0, y=(max(MST_line)+1)*0.09, label="Med. Surv. Time:"), 
              hjust = 0, size=3, 
              show.legend = FALSE, 
              color="gray30") +
    xlab("Time [months]") +
    ylab("Survival probability") +
    geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0),
              fill="white", inherit.aes=FALSE) +  
    geom_hline(yintercept = 0, color = "grey", size=1) +
    geom_text(data = surv_table, aes(x=time,
                                     label =sprintf("%d", n.risk),
                                     y=-0.1*as.numeric(as.factor(sex))),
              size=2.8) +
    scale_y_continuous(breaks = c(-.2, -.1, 0, .25, .5, .75, 1),
                       labels = c("Males", "Females", "0", "25%", "50%", "75%", "100%"),
                       limits = c(-0.2, 1)) +
    # make some space from the left
    xlim(-5, NA) +
    theme(axis.text.y = element_text(colour = c(hue_pal()(2), rep("black", 5)),
                                     size = c(7.5, 7.5, rep(8, 5)))) 

The data:

survival_data <- structure(list(age = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 
1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L), .Label = c("<65", 
"<U+2265>65"), class = "factor"), sex = structure(c(1L, 2L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 
2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 
2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 
1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
2L, 2L, 1L), .Label = c("Male", "Female"), class = "factor"), 
N2O = structure(c(2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
1L, 1L, 2L, 1L, 2L), .Label = c("Overall", "N2"), class = "factor"), 
RNR = structure(c(1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 
2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 
1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L), .Label = c("Resectable", "Non-resectable"
), class = "factor"), TRT = structure(c(2L, 2L, 1L, 1L, 2L, 
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 
1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 
1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L), .Label = c("Surgery", 
"CRT"), class = "factor"), time = c(81L, 80L, 179L, 136L, 
1L, 180L, 2L, 91L, 185L, 37L, 2L, 186L, 80L, 199L, 78L, 3L, 
4L, 83L, 96L, 8L, 77L, 35L, 38L, 118L, 163L, 140L, 195L, 
172L, 127L, 3L, 2L, 83L, 31L, 6L, 125L, 38L, 125L, 3L, 197L, 
36L, 127L, 119L, 181L, 4L, 3L, 176L, 128L, 169L, 12L, 167L, 
195L, 3L, 125L, 89L, 1L, 195L, 38L, 134L, 1L, 177L, 1L, 40L, 
191L, 148L, 190L, 188L, 159L, 80L, 175L, 188L, 2L, 36L, 88L, 
78L, 1L, 86L, 3L, 185L, 35L, 2L, 3L, 193L, 37L, 84L, 193L, 
177L, 2L, 3L, 1L, 73L, 38L, 2L, 173L, 38L, 84L, 197L, 49L, 
196L, 186L, 3L, 1L, 36L, 89L, 46L, 2L, 4L, 152L, 2L, 166L, 
92L, 185L, 175L, 79L, 175L, 5L, 182L, 156L, 186L, 35L, 200L, 
36L, 38L, 190L, 131L, 60L, 131L, 1L, 79L, 4L, 143L, 75L, 
197L, 81L, 118L, 196L, 86L, 194L, 192L, 36L, 1L, 196L, 125L, 
138L, 89L, 198L, 86L, 37L, 77L, 1L, 146L), status = c(1, 
0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 
0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 
1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 
0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 
0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 
0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1)), row.names = c(NA, 
-150L), class = "data.frame")

levels(survival_data$age) <- c("<65", "\u226565")

## ---- survival_overall_data --------
srv_data_tmp <- survival_data
srv_data_tmp$sex <- "Overall"
data_ov_sex <- rbind(survival_data, srv_data_tmp)

#add overall age
srv_data_tmp <- data_ov_sex
srv_data_tmp$age <- "Overall"
data_ov_age <- rbind(data_ov_sex, srv_data_tmp)

#add overall RNR
srv_data_tmp <- data_ov_age
srv_data_tmp$RNR <- "Overall"
data_ov_RNR <- rbind(data_ov_age, srv_data_tmp)

#add overall TRT
srv_data_tmp <- data_ov_RNR
srv_data_tmp$TRT <- "Overall"
data_ov_TRT <- rbind(data_ov_RNR, srv_data_tmp)

survival_overall_data <- data_ov_TRT
rm(srv_data_tmp)

@smouksassi
Copy link
Owner

thanks this is awesome I hope I will have time to get around to this soon ! the above showcase why we need this code and package ! of course PR welcome

@Generalized
Copy link
Author

Generalized commented Jun 25, 2021

@smouksassi Hi! I don't know, if you finally decided to create a separate package for the survival analysis, but (to encourage you) I thought you might want to see, how flexible are the tools you made :) Let me show you an example - the final plot. Sure, there's nothing fancy about it - just ordinary KM. BUT, this can be faceted easily (to any level) without hacks, all components can be easily "commented out", no "million-parameters-big-function controlling everything". It's more in the tidyverse spirit.

Of course, I had to write some code to achieve that (e.g. the median survival time, the table with number of events, combined via patchwork package), but it was much easier with your "atomic" (I mean - simple, single-responsibility) functions km_XX, allowing me to get the basic plot and enhance it as I want, rather than hacking the survminer. That's why I loved your approach and why it would be great to have it separated - in the future :)

obraz

By the way, there's a smart way to exclude a KM band from a certain curve - like an "average" curve, which is just for informative purposes, but not for inference. I tried to achieve that by providing a separate filtered data, but it didn't work. I was wondering, if the km_bands could take an argument to decide to which curve the CI should be added. But, anyway, this can be achieved by assigning a "transparent color" to a certain "fill".
... + scale_fill_manual(values = c("Overall" = "transparent", "Resectable" = "green3", "Unresectable" = "red3"))

@smouksassi smouksassi changed the title stat_kmticks()`: object 'identity_trans' of mode 'function' was not found expand survival data geoms with geom_table_risk etc. Jun 21, 2022
@smouksassi
Copy link
Owner

smouksassi commented Dec 9, 2023

I have written a function that can produce the desired output with control on what to facet on and how to conduct logrank pvalue (grouping)

library(survival)
library(tidyverse)
library(ggquickeda)
library(ggrepel)
library(ggh4x)

ggkmandtable <- function(data = lung_long, # long format filter to Endpoint of choice
                         time = "time"   , # long format filter to Endpoint of choice
                         status = "DV",
                         endpoint ="Endpoint",
                         groupvar1 = "Endpoint", #separate fit by endpoint and by expname
                         groupvar2 ="expname", # and up to two additional grouping
                         exposure_metrics = c("age","ph.karno"),
                                            # exposures/covariates will be stacked into expname exptile
                                            # i
                         exposure_metric_split = c("median","tertile","quartile","none"),
                         exposure_metric_soc_value = -99,
                         exposure_metric_plac_value = 0,
                         color_fill = "exptile",
                         linetype = "exptile",
                         xlab = "Time of follow_up",
                         ylab ="Overall survival probability",
                         nrisk_table_plot = TRUE,
                         nrisk_table_variables = c("n.risk"),#n.risk pct.risk n.event cum.n.event n.censor
                         nrisk_table_breaktimeby = NULL,
                         nrisk_table_textsize = 4,
                         nrisk_position_scaler = 0.2,
                         nrisk_position_dodge = 0.2,
                         nrisk_offset = 0,
                         nrisk_filterout0 = FALSE,
                         km_logrank_pvalue = FALSE,
                         km_trans ="identity" ,#"identity","event","cumhaz","cloglog")
                         km_ticks = TRUE,
                         km_band  = TRUE,
                         km_conf_int = 0.95,
                         km_conf_type = "log", #c("none" , "plain", "log" ,"log-log","logit"),
                         km_conf_lower = "usual", #c("peto" , "modified", "usual"),
                         km_median = "none", #medianci,median none
                         km_yaxis_position = c("left"),#"right
                         facet_formula = NULL,
                         facet_ncol = NULL,
                         facet_strip_position = c("top","top","top","top")
) {
  timevar          <- time
  statusvar        <- status
  endpointinputvar <- endpoint
  groupvar1inputvar <- groupvar1
  groupvar2inputvar <- groupvar2
  colorinputvar    <-  if (color_fill !="none") color_fill else NULL
  fillinputvar     <-  if (color_fill !="none") color_fill else NULL
  linetypeinputvar <-  if (linetype   !="none") linetype   else NULL
  survformula      <-  paste( "Surv","(",timevar,",",statusvar,")",sep="")
  
  facet_formula <- if (is.null(facet_formula) ) stats::as.formula( paste("~",groupvar1inputvar,"+",groupvar2inputvar)) else
    stats::as.formula(facet_formula)
  
  exposure_metric_split <- match.arg(exposure_metric_split)
  
  
  data <- data %>% 
    mutate(none = "none") # needed when no metric are chosen

  data.long <- data %>% 
    gather(expname,expvalue,!!!exposure_metrics) %>% 
    group_by(expname,!!endpoint) 
  
  if(exposure_metric_split=="none") {
    data.long <- data.long %>% 
      mutate(exptile = case_when(
        #expvalue == exposure_metric_soc_value  ~ "SOC",
        #expvalue == exposure_metric_plac_value ~ "Placebo",
        #expvalue  > exposure_metric_plac_value ~ "all"))
    expvalue == exposure_metric_soc_value  ~ NA,
    expvalue == exposure_metric_plac_value ~ NA,
    expvalue  > exposure_metric_plac_value ~ expvalue))
  }
   if(exposure_metric_split=="quartile") {
     data.long <- data.long %>% 
       mutate(
        Q25 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 0.25, na.rm=TRUE),
        Q50 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 0.50, na.rm=TRUE), 
        Q75 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 0.70, na.rm=TRUE)) %>% 
       mutate(exptile = case_when(
         expvalue == exposure_metric_soc_value  ~ "SOC",
         expvalue == exposure_metric_plac_value ~ "Placebo",
         expvalue  > exposure_metric_plac_value &
                           expvalue <= Q25      ~ "Q1",
         expvalue > Q25  & expvalue <= Q50      ~ "Q2",
         expvalue > Q50  & expvalue <= Q75      ~ "Q3",
         expvalue > Q75                         ~ "Q4"))
   }
  if(exposure_metric_split=="tertile") {
    data.long <- data.long %>% 
      mutate(
        Q33 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 1/3, na.rm=TRUE),
        Q66 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 2/3, na.rm=TRUE)) %>% 
      mutate(exptile = case_when(
        expvalue == exposure_metric_soc_value  ~ "SOC",
        expvalue == exposure_metric_plac_value ~" Placebo",
        expvalue  > exposure_metric_plac_value &
                          expvalue <= Q33      ~ "T1",
        expvalue > Q33  & expvalue <= Q66      ~ "T2",
        expvalue > Q66                         ~ "T3"))
  }
  if(exposure_metric_split=="median") {
    data.long <- data.long %>% 
      mutate(
        Q50 = quantile(expvalue[!expvalue %in% c(exposure_metric_soc_value,
                                                 exposure_metric_plac_value)], 0.5, na.rm=TRUE)) %>% 
      mutate(exptile = case_when(
        expvalue == exposure_metric_soc_value  ~"SOC",
        expvalue == exposure_metric_plac_value ~"Placebo",
        expvalue > 0   &  expvalue <= Q50      ~ "M1",
        expvalue > Q50                         ~ "M2"))
  }
  data.long$exptile2 <- data.long$exptile
  
  
  #we generate a curve by the combination of all these inputs removing duplicates and none
  listvars <- unique(c(endpointinputvar,colorinputvar,fillinputvar,linetypeinputvar,groupvar1,groupvar2))
  listvars <- listvars[!is.element(listvars,c("none",".")) ]
  listvars <- listvars[!duplicated(listvars) ]
  if ( length(listvars) ==0 ){
    f <- as.formula(paste(survformula, "1", sep = " ~ "))
  }
  if ( length(listvars) >0 ){
    f <- as.formula(paste(survformula, paste(listvars, collapse = " + "), sep = " ~ "))
  }
  
  surv_object <- eval(bquote( survfit( .(f)  , data.long) ))
  
  if (is.null(nrisk_table_breaktimeby) ||
      nrisk_table_breaktimeby == ''    ||
      is.na(nrisk_table_breaktimeby)){
    ggsurv <- survminer::ggsurvplot(surv_object,
                                    data.long,risk.table = TRUE,
                                    ggtheme = theme_bw())
  } else {
    ggsurv <- survminer::ggsurvplot(surv_object,
                                    data.long,risk.table = TRUE,
                                    break.time.by = nrisk_table_breaktimeby,
                                    ggtheme = theme_bw())
  }
  if(km_logrank_pvalue){ #log rank does not group by color_fill, linetype exptile
    loopvariables <- unique(c(endpointinputvar,"expname",groupvar1,groupvar2))
    loopvariables <- loopvariables[!loopvariables%in% "exptile"]
    listvars2 <- listvars[!listvars%in% loopvariables]
    
    if ( length(listvars2) ==0 ){
      f2 <- as.formula(paste(survformula, "1", sep = " ~ "))
    }
    if ( length(listvars2) >0 ){
      f2 <- as.formula(paste(survformula, paste(listvars2, collapse = " + "), sep = " ~ "))
    }
   
    survfit_by_endpoint <- list()
    logrank_test_by_endpoint <- list()
    data.long <- unite(data.long,"loopvariable", !!!loopvariables, remove = FALSE)
    
    for (i in unique(data.long[,"loopvariable"]) %>%
         pull() %>%
         as.character() ) {
      survregdata<- data.long %>%
        filter(.data[["loopvariable"]] ==i)
      survfit_by_endpoint_fit <- eval(bquote( survfit( .(f2)  , survregdata) ))
      survfit_by_endpoint[[i]] <- survfit_by_endpoint_fit
      logrank_test_by_endpoint_fit <- survminer::surv_pvalue(survfit_by_endpoint_fit, method = "1",
                                                             data=survregdata)
      logrank_test_by_endpoint_fit[,"loopvariable"] <- i
      logrank_test_by_endpoint[[i]] <- logrank_test_by_endpoint_fit
    }
    logrank_test_by_endpoint <- enframe(logrank_test_by_endpoint) %>% 
      unnest(cols = c(value))
    logrank_test_by_endpoint <- logrank_test_by_endpoint %>% 
      separate(loopvariable, into = loopvariables,
               sep="_",extra = "merge"
      )
    #print(listvars2)
    #print(f2)
    #print(logrank_test_by_endpoint)
  }
    #print(listvars)
    #print(f)
  risktabledata <- ggsurv$table$data
  #print(risktabledata)
  if(!is.null(surv_object$strata)){
    variables <- survminer:::.get_variables(risktabledata$strata, surv_object, data.long)
    for(variable in variables) {
      risktabledata[[variable]] <- survminer:::.get_variable_value(variable,risktabledata$strata, surv_object, data.long)
    }
  }
  if(nrisk_filterout0){
    risktabledata <- risktabledata %>% 
      filter(n.risk > 0)
  }
  if(!is.null(nrisk_table_variables) && (length(as.vector(nrisk_table_variables)) > 0) &&
     all(nrisk_table_variables != "")){
    risktabledatag<- gather(risktabledata,key,value, !!!nrisk_table_variables , factor_key = TRUE)
    risktabledatag$keynumeric<- - nrisk_position_scaler* as.numeric(as.factor(risktabledatag$key)) + nrisk_offset
  }
  if(is.null(nrisk_table_variables) || all(nrisk_table_variables == "") ) {
    risktabledatag<- gather(risktabledata,key,value, n.risk, factor_key = TRUE)
    risktabledatag$keynumeric<- - nrisk_position_scaler* as.numeric(as.factor(risktabledatag$key)) + nrisk_offset
  }
  
  if (km_median!="none"){
    if(!is.null(surv_object$strata) || is.matrix(surv_object$surv))  {
    .table <- as.data.frame(summary(surv_object)$table)
  } else {
    .table <- t(as.data.frame(summary(surv_object)$table))
    rownames(.table) <- "(all)"
  }
  surv_median <- as.vector(.table[,"median"])
  dfmedian <- data.frame(x1 = surv_median,
                         x2 = surv_median,
                         x1lower =  as.vector(.table[,"0.95LCL"]),
                         x1upper =  as.vector(.table[,"0.95UCL"]),
                         y1 = rep(0, length(surv_median)),
                         y2 = rep(0.5, length(surv_median)),
                         strata = survminer:::.clean_strata(rownames(.table)))
  
  if(!is.null(surv_object$strata)){
    variables <- survminer:::.get_variables(dfmedian$strata, surv_object, data.long)
    for(variable in variables) {
      dfmedian[[variable]] <- survminer:::.get_variable_value(variable, dfmedian$strata, surv_object, data.long)
    }
  }
  }
  
  plotkm0 <-   ggplot(data.long,aes_string(time = time,status = status,
                                            color = colorinputvar, fill = fillinputvar,
                                            linetype = linetypeinputvar))+
    geom_line(stat = "km",trans = km_trans)
  if(km_band){
    plotkm00 <-  plotkm0 +
      geom_ribbon(stat="kmband",alpha=0.2,color="transparent",
                  conf.int = km_conf_int,
                  conf.type = km_conf_type,
                  conf.lower = km_conf_lower,
                  error = "greenwood",
                  trans = km_trans) 
  }
  if(!km_band){
    plotkm00 <-  plotkm0 
  }
  
  if(km_ticks){
    plotkm000 <-  plotkm00 +
      geom_kmticks(trans = km_trans)
  }
  if(!km_ticks){
    plotkm000 <-  plotkm00 
  }
  if(nrisk_table_plot) {
      plotkm1 <-  plotkm000 +
        geom_text(data=risktabledatag,
                aes(x=time,label=value,y=keynumeric,time=NULL,status=NULL ),
                show.legend = FALSE,
                size= nrisk_table_textsize, 
                position =   ggstance::position_dodgev(height =nrisk_position_dodge)
      )+
        geom_hline(yintercept = -nrisk_position_scaler *unique(c(1,(as.numeric(as.factor(
          risktabledatag$key))+1 )) )  + (abs(nrisk_position_dodge)/2 ) + nrisk_offset
        )
    }

  if(!nrisk_table_plot) {
    plotkm1 <-  plotkm0 
  }
  
  
  if(km_median=="table"){
    plotkm1m  <-  plotkm1 +
      geom_text(data=dfmedian %>% mutate(none="none"), 
                aes(x=0, y=(max(as.numeric(as.factor(get(!!color_fill))))+1)*0.09,
                    label="Med. Surv. Time:"), 
                hjust = 0, size=3, 
                show.legend = FALSE, 
                color="gray30",inherit.aes = FALSE) +
      geom_text(data=dfmedian %>% mutate(none="none",
                                         "{timevar}" := NA,
                                         "{statusvar}" := NA), 
                aes(x=0, y=0.09*as.numeric(as.factor(get(!!color_fill))),
                    label=paste0(get(!!color_fill), ":")), 
                hjust = 0, size=3, 
                show.legend = FALSE,inherit.aes = TRUE) +
      geom_text(data=dfmedian %>% mutate(none="none",
                                         "{timevar}" := NA,
                                         "{statusvar}" := NA), 
                aes(x=60, y=0.09*as.numeric(as.factor(get(!!color_fill))),
                    label=ifelse(is.na(x1), "-",as.character(x1))), 
                hjust = 0, size=3, 
                show.legend = FALSE,inherit.aes = TRUE) 

  }     
  if(km_median=="medianci"){
    plotkm1m  <-  plotkm1 +
      geom_label_repel(data = dfmedian, aes(x= x1 , y= y2 ,label =sprintf("%#.3g (%#.3g, %#.3g)",x1,x1lower,x1upper),
                                            status=NULL,time=NULL),show.legend = FALSE,
                       label.size = NA, direction="both",fill="white",
                       segment.color="black",nudge_y = -0.1,segment.size = 0.5,
                       alpha = 0.5,label.padding=.1, force = 5,
                       na.rm=TRUE,
                       seed = 1234) +
      geom_label_repel(data = dfmedian, aes(x= x1 , y= y2 ,label =sprintf("%#.3g (%#.3g, %#.3g)",x1,x1lower,x1upper),
                                            status=NULL,time=NULL),show.legend = FALSE,
                       label.size = NA,direction="both",
                       nudge_y = -0.1,segment.size = 0.5,
                       arrow = arrow(length = unit(0.03, "npc"), type = "closed", ends = "first"),
                       alpha = 1,label.padding=.1, force = 5,
                       na.rm=TRUE,
                       fill = NA,
                       seed = 1234)
  }
  if(km_median=="median"){
    plotkm1m  <-  plotkm1 +
        geom_label_repel(data = dfmedian, aes(x= x1 , y= y2 ,label = sprintf("%#.3g",x1), status=NULL,time=NULL),show.legend = FALSE,
                         label.size = NA, direction="both",fill="white",
                         segment.color="black",nudge_y = -0.1,segment.size = 0.5,
                         alpha = 0.5,label.padding=.1, force = 5,
                         na.rm=TRUE,
                         seed = 1234) +
        geom_label_repel(data = dfmedian, aes(x= x1 , y= y2 ,label =sprintf("%#.3g",x1),
                                              status=NULL,time=NULL),show.legend = FALSE,
                         label.size = NA,direction="both",
                         nudge_y = -0.1,segment.size = 0.5,
                         arrow = arrow(length = unit(0.03, "npc"), type = "closed", ends = "first"),
                         alpha = 1,label.padding=.1, force = 5,
                         na.rm=TRUE,
                         fill = NA,
                         seed = 1234)
  }
  if(km_median=="none"){
    plotkm1m <- plotkm1
  }
  
  plotkm2 <- plotkm1m +
    facet_nested_wrap(facet_formula,ncol= facet_ncol ,
                      strip =  strip_split(position=facet_strip_position))+
    scale_y_continuous(position = km_yaxis_position,
                       breaks =c(unique(risktabledatag$keynumeric),
                                 c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1) ), 
                       labels= c(as.vector(unique(risktabledatag$key)),
                                 c("0","10","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9","1") ),
                       expand = expansion(mult=c(0.01,0.01),
                                          add =c(0, 0)))+
    scale_x_continuous( breaks =c(unique(risktabledatag$time))) +
    theme_bw()+
    theme(legend.position = "top",strip.placement = "outside",
          axis.title.y = element_blank())+
    labs(color="",fill="",linetype="",
         x = xlab,y = ylab) 
  if(km_logrank_pvalue){
    plotkm3 <-  plotkm2 +
      geom_text(data=logrank_test_by_endpoint,
                aes(x=-Inf,y=Inf,label=pval.txt),vjust=1,hjust=0,
                inherit.aes = FALSE)
    plotkm <- plotkm3
  }
  if(!km_logrank_pvalue) {
    plotkm <- plotkm2
  }
  plotkm
}

Example

library(tidyverse)
library(patchwork)
library(ggquickeda)
library(patchwork)
library(ggh4x)
library(survival)
library(survminer)
source("ggkmandtable.R")
 # see comments above to dput survival_overall_data 
current_survival_data <- survival_overall_data %>% 
  filter(sex != "Overall") %>% 
  droplevels()

ggkmandtable (data = current_survival_data,
              time= "time",
              status ="DV",
              exposure_metrics =c("none"),
              exposure_metric_split = "none",
              color_fill = "sex",
              linetype = "TRT",
              groupvar1 = "RNR",
              groupvar2 = "N2O",
              xlab = "Time of follow_up",
              ylab ="Overall survival probability",
              nrisk_table_variables = c("n.censor"),
              km_median = "table",
              km_band = FALSE,
              nrisk_table_breaktimeby = NULL,
              facet_formula ="~TRT+RNR+N2O",
              facet_ncol = 6,
              facet_strip_position = c("right","top","top") )

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants