removed dplyr dependency / formatting / spelling mistakes / new tests

reconhub · Nov 1, 2019 · ea48430 · ea48430
1 parent 349fcd8
commit ea48430
Show file tree

Hide file tree

Showing 30 changed files with 318 additions and 217 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,7 +14,7 @@ Description: A collection of tools for representing epidemiological contact data
 License: GPL (>=2)
 LazyData: TRUE
 RoxygenNote: 6.1.1
-Imports: grDevices, dplyr, igraph, visNetwork, threejs, colorspace, methods
+Imports: grDevices, tibble, igraph, visNetwork, threejs, colorspace, methods
 Suggests: outbreaks, testthat, covr, shiny, readr, knitr, rmarkdown
 VignetteBuilder: knitr
 URL: https://www.repidemicsconsortium.org/epicontacts/

diff --git a/R/as.igraph.epicontacts.R b/R/as.igraph.epicontacts.R
@@ -47,7 +47,7 @@ as.igraph.epicontacts <- function(x){
   ## Create vertex dataframe using combination of linelist and contacts
 
   all_ids <- data.frame(id = get_id(x, "all"), stringsAsFactors = FALSE)
-  verts <- dplyr::full_join(x$linelist, all_ids, by = "id")
+  verts <- merge(x$linelist, all_ids, by = 'id', all = TRUE)
 
 
   ## Checking if a "name" column exists
@@ -69,13 +69,5 @@ as.igraph.epicontacts <- function(x){
   igraph::vertex_attr(net)$id <- igraph::vertex_attr(net)$name
 
   return(net)
+
 }
-
-
-
-
-
-
-
-
-
diff --git a/R/get_clusters.R b/R/get_clusters.R
@@ -65,16 +65,21 @@ get_clusters <- function(x, output = c("epicontacts", "data.frame"),
   cond <- cluster_cols %in% names(x$linelist)
   if ((sum(cond) > 0) & !override) {
     if (sum(cond) == 1) {
-      msg <- sprintf("'%s' is already in the linelist. Set 'override = TRUE' to write over it, else assign a different %s name.", cluster_cols[cond], cluster_var[cond])
+      msg <- sprintf(paste0("'%s' is already in the linelist. Set 'override",
+                            " = TRUE' to write over it, else assign a ",
+                            "different %s name."),
+                     cluster_cols[cond], cluster_var[cond])
       stop(msg)
     }
     if (sum(cond) == 2) {
-      msg <- sprintf("'%s' and '%s' are already in the linelist. Set 'override = TRUE' to write over them, else assign different cluster column names.", cluster_cols[1], cluster_cols[2])
+      msg <- sprintf(paste0("'%s' and '%s' are already in the linelist. ",
+                            "Set 'override = TRUE' to write over them, ",
+                            "else assign different cluster column names."),
+                     cluster_cols[1], cluster_cols[2])
       stop(msg)
     }
   }
 
-
   output <- match.arg(output)
   net <- as.igraph.epicontacts(x)
   cs <- igraph::clusters(net)
@@ -93,15 +98,17 @@ get_clusters <- function(x, output = c("epicontacts", "data.frame"),
                                    stringsAsFactors = FALSE),
                         c("id", member_col))
 
-  net_nodes <- dplyr::left_join(net_nodes, cs_size, by = member_col)
+  net_nodes <- merge(net_nodes, cs_size, by.x = member_col)
+
   if(output == "epicontacts") {
-    x$linelist <- dplyr::full_join(x$linelist, net_nodes, by = "id")
+    x$linelist <- merge(x$linelist, net_nodes, all = TRUE, by = "id")
     x$linelist[ member_col ] <- as.factor(x$linelist[[ member_col ]])
     return(x)
   } else {
     net_nodes[ member_col ] <- as.factor(net_nodes[[ member_col ]])
     return(net_nodes)
   }
+
 }
 
 
diff --git a/R/get_degree.R b/R/get_degree.R
@@ -38,13 +38,14 @@
 #'
 get_degree <- function(x, type = c("in", "out", "both"),
                        only_linelist = FALSE) {
+
     ## checks
     if (!inherits(x, "epicontacts")) {
         stop("x is not an 'epicontacts' object")
     }
     type <- match.arg(type)
 
-    what      <- if (only_linelist) "linelist" else "contacts"
+    what <- if (only_linelist) "linelist" else "all"
     all_nodes <- get_id(x, which = what, na.rm = TRUE)
 
     if (!x$directed) {
@@ -71,7 +72,6 @@ get_degree <- function(x, type = c("in", "out", "both"),
                       FUN.VALUE = 0L)
     }
 
-
     ## name, shape and return
     names(out) <- all_nodes
     return(out)

diff --git a/R/get_pairwise.R b/R/get_pairwise.R
@@ -80,7 +80,7 @@ get_pairwise <- function(x, attribute, f=NULL, hard_NA=TRUE){
             }
         } else if (is.numeric(values)) {
             f <- function(a, b) {
-                abs(a-b)
+                as.numeric(abs(a-b))
             }
         } else {
             f <- function(a, b){

diff --git a/R/graph3D.R b/R/graph3D.R
@@ -15,8 +15,9 @@
 #'     linelist should be used to color the nodes. Default is \code{id}
 #'
 #' @param annot An index, logical, or character string indicating which fields
-#' of the linelist should be used for annotating the nodes upon mouseover. The default
-#' \code{TRUE} shows the 'id' and 'node_color' (if the grouping column is different from 'id').
+#'   of the linelist should be used for annotating the nodes upon mouseover. The
+#'   default \code{TRUE} shows the 'id' and 'node_color' (if the grouping column
+#'   is different from 'id').
 #'
 #' @param col_pal A color palette for the node_colors.
 #'
@@ -33,14 +34,14 @@
 #' @param edge_size The width of graph edges.
 #'
 #' @note All colors must be specified as color names like "red", "blue", etc. or
-#' as hexadecimal color values without opacity channel, for example "#FF0000", "#0a3e55"
-#' (upper or lower case hex digits are allowed).
+#'   as hexadecimal color values without opacity channel, for example "#FF0000",
+#'   "#0a3e55" (upper or lower case hex digits are allowed).
 #'
 #' Double-click or tap on the plot to reset the view.
 #'
-#' @return
-#' An htmlwidget object that is displayed using the object's show or print method.
-#' (If you don't see your widget plot, try printing it with the \code{print} function.)
+#' @return An htmlwidget object that is displayed using the object's show or
+#'   print method.  (If you don't see your widget plot, try printing it with the
+#'   \code{print} function.)
 #'
 #' @references
 #'
@@ -131,12 +132,13 @@ graph3D <- function(x,
     ## Get vertex attributes and prepare as input for graph
     nodes <- data.frame(id = unique(c(x$linelist$id,
                                       x$contacts$from,
-                                      x$contacts$to)))
+                                      x$contacts$to)),
+                        stringsAsFactors = FALSE)
 
 
     ## join back to linelist to retrieve attributes for grouping
     nodes <- suppressMessages(
-        suppressWarnings(dplyr::left_join(nodes, x$linelist)))
+        suppressWarnings(merge(nodes, x$linelist, all.x = TRUE)))
 
 
 

diff --git a/R/handling.R b/R/handling.R
@@ -94,78 +94,86 @@
 #' plot(y)
 #' }
 "[.epicontacts" <- function(x, i, j,
-                             k = TRUE, l = TRUE,
-                             contacts = c("both", "either", "from", "to"),
-                             ...) {
-    ## In all the following, i is used to subset the linelist, j to subset
-    ## contacts. The behaviour is as follows:
-
-    ## i: if i is character, keep rows for which 'id' is in 'i'; if logical /
-    ## numeric / integer, subset the rows of $linelist
-
-    ## j: if j is character, keep rows for which 'from/to' are in 'j'; the
-    ## argument 'contacts' specify if either, both, and one of the nodes should
-    ## be i 'j'; if logical / numeric / integer, subset the rows of $linelist
-
-    ## 'k' and 'l' are used to subset the columns of attributes in x$linelist
-    ## and x$contacts, respectively; any usual subsetting is fine for these ones
-    ## (index, logical, name), although the 'id', 'from' and 'to' columns are
-    ## discarded from the subsetting. That is:
-
-    ## - in x$linelist: k=1 will refer to the 2nd column (i.e. after 'id')
-
-    ## - in x$contacts: l=1 will refer to the 3nd column (i.e. after 'from' and
-    ## 'to')
-
-    ## subset $linelist
-    if (missing(i)) {
-        i <- TRUE
+                            k = TRUE, l = TRUE,
+                            contacts = c("both", "either", "from", "to"),
+                            ...) {
+  ## In all the following, i is used to subset the linelist, j to subset
+  ## contacts. The behaviour is as follows:
+
+  ## i: if i is character, keep rows for which 'id' is in 'i'; if logical /
+  ## numeric / integer, subset the rows of $linelist
+
+  ## j: if j is character, keep rows for which 'from/to' are in 'j'; the
+  ## argument 'contacts' specify if either, both, and one of the nodes should
+  ## be i 'j'; if logical / numeric / integer, subset the rows of $linelist
+
+  ## 'k' and 'l' are used to subset the columns of attributes in x$linelist
+  ## and x$contacts, respectively; any usual subsetting is fine for these ones
+  ## (index, logical, name), although the 'id', 'from' and 'to' columns are
+  ## discarded from the subsetting. That is:
+
+  ## - in x$linelist: k=1 will refer to the 2nd column (i.e. after 'id')
+
+  ## - in x$contacts: l=1 will refer to the 3nd column (i.e. after 'from' and
+  ## 'to')
+
+  ## subset $linelist
+  if (missing(i)) {
+    i <- TRUE
+  }
+  if (inherits(i, 'Date')) {
+    stop("Cannot subset by date")
+  }
+  if (is.character(i)) {
+    i <- x$linelist$id %in% i
+  }
+
+
+  x$linelist <- x$linelist[i, , drop=FALSE]
+
+  ## make sure 'id' is the first column, keep columns 'k'
+  if (ncol(x$linelist) > 1) {
+    x$linelist <- data.frame(c(x$linelist[1],
+                               x$linelist[-1][k]),
+                             stringsAsFactors = FALSE)
+  }
+
+
+  ## subset $contacts
+  if (missing(j)) {
+    j <- TRUE
+  }
+
+  ## subsetting based on node ids
+  if (is.character(j)) {
+    contacts <- match.arg(contacts)
+    if (contacts == "both") {
+      j <- (x$contacts$from %in% j) & (x$contacts$to %in% j)
     }
-    if (is.character(i)) {
-        i <- x$linelist$id %in% i
+    if (contacts == "either") {
+      j <- (x$contacts$from %in% j) | (x$contacts$to %in% j)
     }
-
-    x$linelist <- x$linelist[i, , drop=FALSE]
-
-    ## make sure 'id' is the first column, keep columns 'k'
-    if (ncol(x$linelist) > 1) {
-        x$linelist <- data.frame(c(x$linelist[1],
-                                   x$linelist[-1][k]),
-                                 stringsAsFactors = FALSE)
+    if (contacts == "from") {
+      j <- x$contacts$from %in% j
     }
-
-
-    ## subset $contacts
-    if (missing(j)) {
-        j <- TRUE
+    if (contacts == "to") {
+      j <- x$contacts$to %in% j
     }
+  }
+  if (inherits(j, 'Date')) {
+    stop("Cannot subset by date")
+  }
 
-    ## subsetting based on node ids
-    if (is.character(j)) {
-        contacts <- match.arg(contacts)
-        if (contacts == "both") {
-            j <- (x$contacts$from %in% j) & (x$contacts$to %in% j)
-        }
-        if (contacts == "either") {
-            j <- (x$contacts$from %in% j) | (x$contacts$to %in% j)
-        }
-        if (contacts == "from") {
-            j <- x$contacts$from %in% j
-        }
-        if (contacts == "to") {
-            j <- x$contacts$to %in% j
-        }
-    }
 
-    x$contacts <- x$contacts[j, , drop=FALSE]
+  x$contacts <- x$contacts[j, , drop=FALSE]
 
-    ## make sure from/to are the first 2 columns, keep columns 'l'
-    if (ncol(x$contacts) > 2) {
-        x$contacts <- data.frame(c(x$contacts[1:2],
-                                   x$contacts[-c(1:2)][l]),
-                                 stringsAsFactors = FALSE)
+  ## make sure from/to are the first 2 columns, keep columns 'l'
+  if (ncol(x$contacts) > 2) {
+    x$contacts <- data.frame(c(x$contacts[1:2],
+                               x$contacts[-c(1:2)][l]),
+                             stringsAsFactors = FALSE)
 
-    }
+  }
 
-    return(x)
+  return(x)
 }
diff --git a/R/make_epicontacts.R b/R/make_epicontacts.R
@@ -95,8 +95,8 @@ make_epicontacts <- function(linelist, contacts, id = 1L, from = 1L, to = 2L,
   }
 
   if (is.vector(linelist) &&
-        length(linelist) == 1L &&
-        is.na(linelist)) {
+      length(linelist) == 1L &&
+      is.na(linelist)) {
     stop("linelist is NA")
   }
 
@@ -119,8 +119,10 @@ make_epicontacts <- function(linelist, contacts, id = 1L, from = 1L, to = 2L,
   linelist <- subset(linelist,
                      select = c(id, setdiff(seq_len(ncol(linelist)), id)))
 
-  ## convert factors to characters
-  if (is.factor(linelist$id)) linelist$id <- as.character(linelist$id)
+  ## convert factors and dates to characters
+  if (inherits(linelist$id, c("Date", "factor"))) {
+    linelist$id <- as.character(linelist$id)
+  }
 
 
   ## process contacts ##
@@ -131,8 +133,8 @@ make_epicontacts <- function(linelist, contacts, id = 1L, from = 1L, to = 2L,
   }
 
   if (is.vector(contacts) &&
-        length(contacts) == 1L &&
-        is.na(contacts)) {
+      length(contacts) == 1L &&
+      is.na(contacts)) {
     stop("contacts is NA")
   }
 
@@ -141,7 +143,7 @@ make_epicontacts <- function(linelist, contacts, id = 1L, from = 1L, to = 2L,
   if(nrow(contacts) < 1L) {
     stop("contacts should have at least one row")
   }
-  if (ncol(contacts) < 2L) {
+  if(ncol(contacts) < 2L) {
     stop("contacts should have at least two columns")
   }
 
@@ -159,8 +161,8 @@ make_epicontacts <- function(linelist, contacts, id = 1L, from = 1L, to = 2L,
 
   ## ensure all IDs are stored as characters if one is
   if (is.character(linelist$id) ||
-        is.character(contacts$from) ||
-        is.character(contacts$to)) {
+      is.character(contacts$from) ||
+      is.character(contacts$to)) {
     linelist$id <- as.character(linelist$id)
     contacts$from <- as.character(contacts$from)
     contacts$to <- as.character(contacts$to)

diff --git a/R/plot.R b/R/plot.R
@@ -7,10 +7,11 @@
 #'
 #' @param x An \code{\link{epicontacts}} object
 #'
-#' @param node_color An integer or a character string indicating which attribute column
-#' in the linelist should be used to color the nodes.
+#' @param node_color An integer or a character string indicating which attribute
+#'   column in the linelist should be used to color the nodes.
 #'
-#' @param thin A logical indicating if the data should be thinned with \code{\link{thin}} so that only cases with contacts should be plotted.
+#' @param thin A logical indicating if the data should be thinned with
+#'   \code{\link{thin}} so that only cases with contacts should be plotted.
 #'
 #' @param method A character string indicating the plotting method to be used;
 #' available values are "visNetwork" and "graph3D"; see details.

diff --git a/R/print.epicontacts.R b/R/print.epicontacts.R
@@ -17,14 +17,14 @@ print.epicontacts <- function(x, ...){
     cat("\n  //", format(nrow(x$linelist),big.mark=","),
         "cases in linelist;",
         format(nrow(x$contacts), big.mark=","),
-        "contacts; ", ifelse(x$directed, "directed", "non directed"),
+        "contacts;", ifelse(x$directed, "directed", "non directed"),
         "\n")
 
     cat("\n  // linelist\n\n")
-    print(dplyr::tbl_df(x$linelist))
+    print(tibble::as_tibble(x$linelist))
 
     cat("\n  // contacts\n\n")
-    print(dplyr::tbl_df(x$contacts))
+    print(tibble::as_tibble(x$contacts))
 
     cat("\n")
 }