From 4440b3f1d6dfc1a9a038f65be82c908245c2a793 Mon Sep 17 00:00:00 2001
From: xihang-chen <xihang.chen@ndorms.ox.ac.uk>
Date: Wed, 27 Nov 2024 14:11:51 +0000
Subject: [PATCH] recreate plot_seuqnec_ratio using visOmop

---
 R/plotSequenceRatios.R                      | 147 +++++++++++---------
 man/plotSequenceRatios.Rd                   |  15 +-
 man/tableSequenceRatios.Rd                  |   2 +-
 tests/testthat/test-plotSequenceRatio.R     |  12 +-
 vignettes/a01_Introduction.Rmd              |   7 +-
 vignettes/a04_Visualise_sequence_ratios.Rmd |  10 +-
 6 files changed, 104 insertions(+), 89 deletions(-)

diff --git a/R/plotSequenceRatios.R b/R/plotSequenceRatios.R
index afa98b6..e16ea25 100644
--- a/R/plotSequenceRatios.R
+++ b/R/plotSequenceRatios.R
@@ -4,10 +4,12 @@
 #' It provides a ggplot of the sequence ratios of index and marker cohorts.
 #'
 #' @param result Table output from summariseSequenceRatios.
-#' @param onlyaSR If the only SR to be plotted is the adjusted SR.
-#' @param plotTitle Title of the plot, if NULL no title will be plotted.
+#' @param onlyASR If set to be TRUE then only adjusted SR will be plotted.
+#' Otherwise if it is set to be FALSE then both adjusted and crude SR will be plotted.
+#' @param plotTitle Title of the plot, if NULL no title will be included in the plot.
 #' @param labs Axis labels for the plot.
-#' @param colours Colours for both parts of the plot, pre- and post- time 0.
+#' @param colours Colours for sequence ratio.
+#' @param facet The variable to facet by.
 #'
 #' @return A plot for the sequence ratios of index and marker cohorts.
 #'
@@ -16,7 +18,6 @@
 #' @examples
 #' \donttest{
 #' library(CohortSymmetry)
-#'
 #' cdm <- mockCohortSymmetry()
 #' cdm <- generateSequenceCohortSet(cdm = cdm,
 #'                                  indexTable = "cohort_1",
@@ -28,86 +29,106 @@
 #' CDMConnector::cdmDisconnect(cdm = cdm)
 #' }
 plotSequenceRatios <- function(result,
-                               onlyaSR = FALSE,
+                               onlyASR = FALSE,
                                plotTitle = NULL,
                                labs = c("SR", "Drug Pairs"),
-                               colours = c("red", "blue")
-                               ) {
-  # checks
-  checkInputPlotSequenceRatios(result = result,
-                               onlyaSR = onlyaSR,
-                               plotTitle = plotTitle,
-                               labs = labs,
-                               colours = colours)
+                               colours = c("red", "blue"),
+                               facet = NULL
+                               ){
+  # validate checks
+  result <- omopgenerics::validateResultArgument(result)
 
+  # check settings
   result <- result |>
-    visOmopResults::splitGroup()
+    visOmopResults::filterSettings(
+      .data$result_type == "sequence_ratios"
+    )
+
+  if (nrow(result) == 0) {
+    cli::cli_warn("`result` object does not contain any `result_type == 'sequence_ratios'` information.")
+  }
+
+  if (!is.logical(onlyASR)) {
+    cli::cli_abort("The parameter onlyASR has to be either True or False.")
+  }
+
+  if(onlyASR) {
+    checkmate::assert_character(colours,
+                                len = 1)
+  } else {
+    checkmate::assert_character(colours,
+                                len = 2)
+  }
 
-  sr_tidy <- result |>
-    visOmopResults::filterSettings(.data$result_type == "sequence_ratios") |>
-    dplyr::select(-c("cdm_name", "strata_name", "strata_level", "variable_level")) |>
-    visOmopResults::splitAdditional() |>
-    tidyr::pivot_wider(names_from = "estimate_name", values_from = "estimate_value") |>
-    dplyr::mutate(group = paste0(.data$index_cohort_name, " -> ", .data$marker_cohort_name)) |>
-    dplyr::select(-c("index_cohort_name", "marker_cohort_name")) |>
+  data <- result |>
+    omopgenerics::tidy() |>
     dplyr::mutate(
-      point_estimate = as.numeric(.data$point_estimate),
-      lower_CI = as.numeric(.data$lower_CI),
-      upper_CI = as.numeric(.data$upper_CI),
-      variable_name = as.factor(.data$variable_name)
+      pair = paste0(.data$index_cohort_name, "->", .data$marker_cohort_name)
     ) |>
-    dplyr::select(tidyselect::where( ~ dplyr::n_distinct(.) > 1)|.data$group) |>
-    dplyr::rename(
-      !!labs[1] := "point_estimate",
-      !!labs[2] := "group"
-    )
+    dplyr::filter(.data$variable_level == "sequence_ratio") |>
+    dplyr::select("pair", "variable_name", "point_estimate",
+                  "lower_CI", "upper_CI", "cdm_name",
+                  "cohort_date_range", "combination_window",
+                  "confidence_interval", "days_prior_observation",
+                  "index_marker_gap", "moving_average_restriction",
+                  "washout_window")
 
-  if(onlyaSR) {
-    sr_tidy <- sr_tidy |>
+  if (onlyASR){
+    data <- data |>
       dplyr::filter(.data$variable_name == "adjusted")
-    colours = c("adjusted" = colours[1])
-  } else {
-    sr_tidy <- sr_tidy |>
-      dplyr::filter(.data$variable_name == "adjusted"|.data$variable_name == "crude")
-    colours = c("crude" = colours[1], "adjusted" = colours[2])
-  }
 
-  facet_wrap_vars <- colnames(sr_tidy)[! colnames(sr_tidy) %in% c(labs[2], labs[1], "lower_CI", "upper_CI", "variable_name", "count", "percentage", "variable_name", "estimate_type")]
-  for(i in facet_wrap_vars) {
-    sr_tidy <- sr_tidy |>
-      dplyr::mutate(!!i := paste0(i, " = ", .data[[i]]))
-  }
+    custom_colors <- c("adjusted" = colours)
 
-  if(length(facet_wrap_vars) == 0) {
-    ggplot2::ggplot(data = sr_tidy, ggplot2::aes(
-      x = .data[[labs[1]]], y = .data[[labs[2]]], group = .data$variable_name)) +
-      ggplot2::geom_errorbarh(ggplot2::aes(xmin = .data$lower_CI, xmax = .data$upper_CI, colour = .data$variable_name), height = 0.2) +
-      ggplot2::geom_point(ggplot2::aes(colour = .data$variable_name, shape = .data$variable_name), size = 3) +
-      ggplot2::geom_vline(ggplot2::aes(xintercept = 1), linetype = 2) +
-      ggplot2::scale_shape_manual(values = rep(19, 5)) +
-      ggplot2::scale_colour_manual(values = colours) +
+    p <- visOmopResults::scatterPlot(
+      data,
+      x = "pair",
+      y = "point_estimate",
+      line = FALSE,
+      point = TRUE,
+      ribbon = FALSE,
+      ymin = "lower_CI",
+      ymax = "upper_CI",
+      facet = facet,
+      colour = "variable_name"
+    ) +
+      ggplot2::ylab(labs[1]) +
+      ggplot2::xlab(labs[2]) +
       ggplot2::labs(title = plotTitle) +
+      ggplot2::coord_flip() +
       ggplot2::theme_bw() +
+      ggplot2::scale_colour_manual(values = custom_colors) +
       ggplot2::theme(panel.border = ggplot2::element_blank(),
                      axis.line = ggplot2::element_line(),
                      legend.title = ggplot2::element_blank(),
-                     plot.title = ggplot2::element_text(hjust = 0.5)
-      )
+                     plot.title = ggplot2::element_text(hjust = 0.5))
+
   } else {
-    ggplot2::ggplot(data = sr_tidy, ggplot2::aes(
-      x = .data[[labs[1]]], y = .data[[labs[2]]], group = .data$variable_name)) +
-      ggplot2::geom_errorbarh(ggplot2::aes(xmin = .data$lower_CI, xmax = .data$upper_CI, colour = .data$variable_name), height = 0.2) +
-      ggplot2::geom_point(ggplot2::aes(colour = .data$variable_name, shape = .data$variable_name), size = 3) +
-      ggplot2::geom_vline(ggplot2::aes(xintercept = 1), linetype = 2) +
-      ggplot2::scale_shape_manual(values = rep(19, 5)) +
-      ggplot2::scale_colour_manual(values = colours) +
-      ggplot2::facet_wrap(stats::as.formula(paste("~", paste(facet_wrap_vars, collapse = " + ")))) +
+
+    custom_colors <- c("adjusted" = colours[1],
+                       "crude" = colours[2])
+
+    p <- visOmopResults::scatterPlot(
+      data,
+      x = "pair",
+      y = "point_estimate",
+      line = FALSE,
+      point = TRUE,
+      ribbon = FALSE,
+      ymin = "lower_CI",
+      ymax = "upper_CI",
+      facet = facet,
+      colour = "variable_name"
+    ) +
+      ggplot2::ylab(labs[1]) +
+      ggplot2::xlab(labs[2]) +
       ggplot2::labs(title = plotTitle) +
+      ggplot2::coord_flip() +
       ggplot2::theme_bw() +
+      ggplot2::scale_colour_manual(values = custom_colors) +
       ggplot2::theme(panel.border = ggplot2::element_blank(),
                      axis.line = ggplot2::element_line(),
                      legend.title = ggplot2::element_blank(),
-                     plot.title = ggplot2::element_text(hjust = 0.5)
-      )
+                     plot.title = ggplot2::element_text(hjust = 0.5))
   }
+  return(p)
 }
diff --git a/man/plotSequenceRatios.Rd b/man/plotSequenceRatios.Rd
index 6d8b91a..f2c2499 100644
--- a/man/plotSequenceRatios.Rd
+++ b/man/plotSequenceRatios.Rd
@@ -6,22 +6,26 @@
 \usage{
 plotSequenceRatios(
   result,
-  onlyaSR = FALSE,
+  onlyASR = FALSE,
   plotTitle = NULL,
   labs = c("SR", "Drug Pairs"),
-  colours = c("red", "blue")
+  colours = c("red", "blue"),
+  facet = NULL
 )
 }
 \arguments{
 \item{result}{Table output from summariseSequenceRatios.}
 
-\item{onlyaSR}{If the only SR to be plotted is the adjusted SR.}
+\item{onlyASR}{If set to be TRUE then only adjusted SR will be plotted.
+Otherwise if it is set to be FALSE then both adjusted and crude SR will be plotted.}
 
-\item{plotTitle}{Title of the plot, if NULL no title will be plotted.}
+\item{plotTitle}{Title of the plot, if NULL no title will be included in the plot.}
 
 \item{labs}{Axis labels for the plot.}
 
-\item{colours}{Colours for both parts of the plot, pre- and post- time 0.}
+\item{colours}{Colours for sequence ratio.}
+
+\item{facet}{The variable to facet by.}
 }
 \value{
 A plot for the sequence ratios of index and marker cohorts.
@@ -32,7 +36,6 @@ It provides a ggplot of the sequence ratios of index and marker cohorts.
 \examples{
 \donttest{
 library(CohortSymmetry)
-
 cdm <- mockCohortSymmetry()
 cdm <- generateSequenceCohortSet(cdm = cdm,
                                  indexTable = "cohort_1",
diff --git a/man/tableSequenceRatios.Rd b/man/tableSequenceRatios.Rd
index c5ccc9f..bd6247e 100644
--- a/man/tableSequenceRatios.Rd
+++ b/man/tableSequenceRatios.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/tableSequenceratios.R
+% Please edit documentation in R/tableSequenceRatios.R
 \name{tableSequenceRatios}
 \alias{tableSequenceRatios}
 \title{A formatted visualization of sequence_symmetry objects.}
diff --git a/tests/testthat/test-plotSequenceRatio.R b/tests/testthat/test-plotSequenceRatio.R
index 20033d6..331194d 100644
--- a/tests/testthat/test-plotSequenceRatio.R
+++ b/tests/testthat/test-plotSequenceRatio.R
@@ -26,7 +26,7 @@ test_that("plot working", {
   plotSR2 <- plotSequenceRatios(result, plotTitle = "Test plot")
   plotSR3 <- plotSequenceRatios(result, labs = c("xlab", "ylab"))
   plotSR4 <- plotSequenceRatios(result, colours = c("blue", "green"))
-  plotSR5 <- plotSequenceRatios(result, onlyaSR = TRUE, colours = c("orange"))
+  plotSR5 <- plotSequenceRatios(result, onlyASR = TRUE, colours = c("orange"))
 
   expect_true("ggplot" %in% (plotSR1 %>% class()))
   expect_true("ggplot" %in% (plotSR2 %>% class()))
@@ -67,16 +67,10 @@ test_that("expected errors", {
   expect_error(plotSequenceRatios("result"))
   expect_error(plotSequenceRatios("result22"))
   expect_error(plotSequenceRatios(result2))
-  expect_error(plotSequenceRatios(result, colours = c("no", "black")))
   expect_error(plotSequenceRatios(result, colours = "red"))
   expect_error(plotSequenceRatios(result, colours = c(3,4)))
-  expect_error(plotSequenceRatios(result, plotTitle = 2))
-  expect_error(plotSequenceRatios(result, plotTitle = c("1", "2")))
-  expect_error(plotSequenceRatios(result, labs = NULL))
-  expect_error(plotSequenceRatios(result, labs = c(2,3)))
-  expect_error(plotSequenceRatios(result, labs = c("a", "b", "c")))
-  expect_error(plotSequenceRatios(result, onlyaSR = 3))
-  expect_error(plotSequenceRatios(result, onlyaSR = TRUE, colours = c("red", "blue")))
+  expect_error(plotSequenceRatios(result, onlyASR = 3))
+  expect_error(plotSequenceRatios(result, onlyASR = TRUE, colours = c("red", "blue")))
 
   CDMConnector::cdmDisconnect(cdm)
 })
diff --git a/vignettes/a01_Introduction.Rmd b/vignettes/a01_Introduction.Rmd
index f8ece05..967d058 100644
--- a/vignettes/a01_Introduction.Rmd
+++ b/vignettes/a01_Introduction.Rmd
@@ -51,7 +51,7 @@ The CohortSymmetry package works with data mapped to the OMOP CDM. Hence, the in
 
 ```{r message= FALSE, warning=FALSE}
 cdm <- emptyCdmReference(cdmName = "mock") |>
-  mockPerson(nPerson = 1000) |>
+  mockPerson(nPerson = 100) |>
   mockObservationPeriod() |>
   mockCohort(
     name = "index_cohort",
@@ -74,7 +74,6 @@ cdm$index_cohort |>
 
 cdm$marker_cohort |> 
   dplyr::glimpse()
-
 ```
 
 Once we have established a connection to the database, we can use the `generateSequenceCohortSet()` function to find the intersection of the two cohorts. This function will provide us with the individuals who appear in both cohorts, which will be named **intersect** - another cohort in the cdm reference.
@@ -119,9 +118,7 @@ tableSequenceRatios(result)
 Or create a plot with the adjusted sequence ratios:
 
 ```{r message= FALSE, warning=FALSE}
-plotSequenceRatios(result = result,
-                  onlyaSR = T,
-                  colours = "black")
+plotSequenceRatios(result = result)
 ```
 
 ## As a diagram
diff --git a/vignettes/a04_Visualise_sequence_ratios.Rmd b/vignettes/a04_Visualise_sequence_ratios.Rmd
index 2dc87b0..3830acc 100644
--- a/vignettes/a04_Visualise_sequence_ratios.Rmd
+++ b/vignettes/a04_Visualise_sequence_ratios.Rmd
@@ -109,11 +109,11 @@ plotSequenceRatios(result = result)
 
 By default, it plots both the adjusted sequence ratios (and its CIs) and crude sequence ratios (and its CIs). One may wish to only plot adjusted one like so (note since only adjusted is plotted, only one colour needs to be specified):
 
-## Modify `onlyaSR` and `colours`
+## Modify `onlyASR` and `colours`
 
 ```{r message= FALSE, warning=FALSE}
 plotSequenceRatios(result = result,
-                   onlyaSR = T,
+                   onlyASR = T,
                    colours = "black")
 ```
 
@@ -121,7 +121,7 @@ One could change the colour like so:
 
 ```{r message= FALSE, warning=FALSE}
 plotSequenceRatios(result = result,
-                   onlyaSR = T,
+                   onlyASR = T,
                    colours = "red")
 ```
 
@@ -131,7 +131,7 @@ One could set the title like so:
 
 ```{r message= FALSE, warning=FALSE}
 plotSequenceRatios(result = result,
-                   onlyaSR = T,
+                   onlyASR = T,
                    plotTitle = "Adjusted Sequence Ratio",
                    colour = "black")
 ```
@@ -142,7 +142,7 @@ One could also change the $x$ and $y$ labels like so:
 
 ```{r message= FALSE, warning=FALSE}
 plotSequenceRatios(result = result,
-                   onlyaSR = T,
+                   onlyASR = T,
                    plotTitle = "Adjusted Sequence Ratio",
                    colour = "black",
                    labs = c("sequence ratios", "analysis"))