PLN-team · jchiquet · Sep 11, 2024 · Aug 28, 2024 · Aug 28, 2024 · Sep 10, 2024
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -33,3 +33,4 @@
 ^data-raw$
 ^CRAN-SUBMISSION$
 ^AUTHORS$
+^dev$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,65 +2,74 @@ Package: PLNmodels
 Title: Poisson Lognormal Models
 Version: 1.2.1
 Authors@R: c(
-  person("Julien", "Chiquet", role = c("aut", "cre"), email = "julien.chiquet@inrae.fr",
-      comment = c(ORCID = "0000-0002-3629-3429")),
-  person("Mahendra", "Mariadassou", role = "aut", email = "mahendra.mariadassou@inrae.fr", 
-      comment = c(ORCID = "0000-0003-2986-354X")),
-  person("Stéphane", "Robin", role ="aut", email = "stephane.robin@inrae.fr"),
-  person("François", "Gindraud", role = "aut", email = "francois.gindraud@gmail.com"),
-  person("Julie", "Aubert", role = "ctb", email = "julie.aubert@inrae.fr"),
-  person("Bastien", "Batardière", role = "ctb", email = "bastien.batardiere@inrae.fr"),
-  person("Giovanni", "Poggiato", role = "ctb", email = "giov.poggiato@gmail.com"),
-  person("Cole", "Trapnell", role = "ctb", email = "coletrap@uw.edu"),
-  person("Maddy", "Duran", role = "ctb", email = "duran@uw.edu")
+    person("Julien", "Chiquet", , "julien.chiquet@inrae.fr", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0002-3629-3429")),
+    person("Mahendra", "Mariadassou", , "mahendra.mariadassou@inrae.fr", role = "aut",
+           comment = c(ORCID = "0000-0003-2986-354X")),
+    person("Stéphane", "Robin", , "stephane.robin@inrae.fr", role = "aut"),
+    person("François", "Gindraud", , "francois.gindraud@gmail.com", role = "aut"),
+    person("Julie", "Aubert", , "julie.aubert@inrae.fr", role = "ctb"),
+    person("Bastien", "Batardière", , "bastien.batardiere@inrae.fr", role = "ctb"),
+    person("Giovanni", "Poggiato", , "giov.poggiato@gmail.com", role = "ctb"),
+    person("Cole", "Trapnell", , "coletrap@uw.edu", role = "ctb"),
+    person("Maddy", "Duran", , "duran@uw.edu", role = "ctb")
   )
-Description: The Poisson-lognormal model and variants (Chiquet, Mariadassou and Robin, 
-    2021 <doi:10.3389/fevo.2021.588292>) can be used for 
-    a variety of multivariate problems when count data are at play, including 
-    principal component analysis for count data, discriminant analysis, model-based clustering and 
-    network inference. Implements variational algorithms to fit such models accompanied with a set of 
-    functions for visualization and diagnostic. 
+Description: The Poisson-lognormal model and variants (Chiquet,
+    Mariadassou and Robin, 2021 <doi:10.3389/fevo.2021.588292>) can be
+    used for a variety of multivariate problems when count data are at
+    play, including principal component analysis for count data,
+    discriminant analysis, model-based clustering and network inference.
+    Implements variational algorithms to fit such models accompanied with
+    a set of functions for visualization and diagnostic.
 URL: https://pln-team.github.io/PLNmodels/
 BugReports: https://github.com/pln-team/PLNmodels/issues
 License: GPL (>= 3)
-Encoding: UTF-8
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
 Depends: R (>= 3.6)
-LazyData: true
-biocViews:
 Imports: 
-  methods,
-  stats,
-  MASS,
-  future,
-  future.apply,
-  R6,
-  glassoFast,
-  pscl,
-  Matrix,
-  Rcpp,
-  nloptr,
-  igraph,
-  grid,
-  gridExtra,
-  dplyr,
-  tidyr,
-  purrr,
-  ggplot2,
-  corrplot,
-  magrittr,
-  torch,
-  rlang
+    cli,
+    corrplot,
+    dplyr,
+    future,
+    future.apply,
+    ggplot2,
+    glassoFast,
+    grDevices,
+    grid,
+    gridExtra,
+    igraph,
+    magrittr,
+    MASS,
+    Matrix,
+    methods,
+    nloptr,
+    parallel,
+    pscl,
+    purrr,
+    R6,
+    Rcpp,
+    rlang,
+    scales,
+    stats,
+    tidyr,
+    torch
 Suggests: 
+    factoextra,
     knitr,
     rmarkdown,
-    testthat,
-    pkgdown,
     spelling,
-    factoextra
-LinkingTo: Rcpp, RcppArmadillo, nloptr
-VignetteBuilder: knitr
+    testthat
+LinkingTo: 
+    nloptr,
+    Rcpp,
+    RcppArmadillo
+VignetteBuilder: 
+    knitr
+biocViews:
+Encoding: UTF-8
+Language: en-US
+LazyData: true
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.3.2
 Collate: 
     'PLNfit-class.R'
     'PLN.R'
@@ -102,4 +111,3 @@ Collate:
     'utils-zipln.R'
     'utils.R'
     'zzz.R'
-Language: en-US
diff --git a/NAMESPACE b/NAMESPACE
@@ -107,6 +107,7 @@ importFrom(purrr,map_dbl)
 importFrom(purrr,map_int)
 importFrom(purrr,reduce)
 importFrom(rlang,.data)
+importFrom(scales,alpha)
 importFrom(stats,.getXlevels)
 importFrom(stats,.lm.fit)
 importFrom(stats,as.formula)

diff --git a/R/barents.R b/R/barents.R
@@ -7,7 +7,7 @@
 #'
 #' @format A data frame with 6 variables:
 #' * Abundance: A 30 fish species by 89 sites count matrix
-#' * Offset: A 30 fish species by 116 samples offset matrix, measuring the sampling effort in each site
+#' * Offset: A 30 fish species by 89 samples offset matrix, measuring the sampling effort in each site
 #' * 4 covariates for latitude, longitude, depth (in meters), temperature (in Celsius degrees).
 #'
 #' @references Fossheim, Maria, Einar M. Nilssen, and Michaela Aschan. "Fish assemblages in the Barents Sea." Marine Biology Research 2.4 (2006). \doi{10.1080/17451000600815698}

diff --git a/R/import_utils.R b/R/import_utils.R
@@ -3,19 +3,27 @@
 ## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 ## Internal function to find the most comprehensive set of common samples between a count table and a covariates data.frame
-common_samples <- function(counts, covariates) {
+common_samples <- function(counts, covariates, call = rlang::caller_env()) {
   ## Have default samples names been created when matching samples?
   default_names <- FALSE
   ## Sanity checks:
-  name_warning <- paste("There are no matching names in the count matrix and the covariates data.frame.",
-                        "Function will proceed assuming:",
-                        "- samples are in the same order;",
-                        "- samples are rows of the abundance matrix.", sep = "\n")
+  name_warning <- c(
+    "!" = "There are no matching names in {.var counts} and {.var covariates}.",
+    "i" = "Function will proceed assuming:",
+    "i" = "- samples are in the same order;",
+    "i" = "- samples are rows of {.var counts}.", sep = "\n"
+  )
+  row_count_abort <- c(
+    "x" = "{.var counts} and {.var covariates} have different number of row(s):",
+    "i" = "{.var counts} has {.cls {nrow(counts)}} row(s);",
+    "i" = "{.var covariates} has {.cls {nrow(covariates)}} row(s).",
+    sep = "\n"
+  )
   ## no sample names in covariates: create sample names
   if (is.null(rownames(covariates))) {
-    warning(name_warning)
+    cli::cli_warn(name_warning, call = call)
     if (nrow(counts) != nrow(covariates)) {
-      stop("Incompatible dimensions")
+      cli::cli_abort(row_count_abort, call = call)
     }
     if (is.null(rownames(counts))) rownames(counts) <- paste0("Sample_", 1:nrow(counts))
     rownames(covariates) <- rownames(counts)
@@ -27,12 +35,15 @@ common_samples <- function(counts, covariates) {
     # If name matching is impossible, abort if
     ## - dimension are incompatible or
     ## - row (samples) names are conflicting
-    warning(name_warning)
+    cli::cli_warn(name_warning, call = call)
     if (nrow(counts) != nrow(covariates)) {
-      stop("Incompatible dimensions")
+      cli::cli_abort(row_count_abort, call = call)
     }
     if (!is.null(rownames(counts))) {
-      stop("Conflicting samples names in count matrix and covariates data frames")
+      cli::cli_abort(c(
+        "x" = "Conflicting sample names in {.var counts} matrix and {.var covariates} data frames",
+        "i" = "Sample names in {.var counts} matrix is {.cls {rownames(counts)}} and in {.var covariates} is {.cls {rownames(covariates)}}."
+      ), call = call)
     }
     rownames(counts) <- rownames(covariates)
     default_names <- TRUE
@@ -44,8 +55,20 @@ common_samples <- function(counts, covariates) {
   if (sample_are_cols) counts <- t(counts)
   ## Ensure consistency by using only common samples
   common_samples <- intersect(rownames(counts), rownames(covariates))
+  # Add condition when less counts than covariates information
   if (length(common_samples) < nrow(counts)) {
-    message(paste0(nrow(counts) - length(common_samples), " samples were dropped from the abundance matrix for lack of associated covariates."))
+    cli::cli_warn(c(
+      "!" = "There are less samples in {.var counts} than in {.var covariates}.",
+      "i" = "{.cls {nrow(counts) - length(common_samples)}} samples were dropped from the {.var counts} matrix for lack of associated {.var covariates}.",
+      "i" = "{.cls There {?is/are} {length(common_samples)}} sample{?s} in the final data.frame."
+    ), call = call)
+  }
+  if (length(common_samples) < nrow(covariates)) {
+    cli::cli_warn(c(
+      "!" = "There are less samples in {.var covariates} than in {.var counts}.",
+      "i" = "{.cls {nrow(covariates) - length(common_samples)}} samples were dropped from {.var covariates} for lack of associated {.var counts}.",
+      "i" = "{.cls There {?is/are} {length(common_samples)}} sample{?s} in the final data.frame."
+    ), call = call)
   }
   return(list(transpose_counts = sample_are_cols,
               common_samples   = common_samples,
@@ -403,6 +426,7 @@ species_variance <- function(counts, groups = rep(1, nrow(counts)), depths_as_of
 #' @param counts Required. An abundance count table, preferably with dimensions names and species as columns.
 #' @param covariates Required. A covariates data frame, preferably with row names.
 #' @param offset Optional. Normalization scheme used to compute scaling factors used as offset during PLN inference. Available schemes are "TSS" (Total Sum Scaling, default), "CSS" (Cumulative Sum Scaling, used in metagenomeSeq), "RLE" (Relative Log Expression, used in DESeq2), "GMPR" (Geometric Mean of Pairwise Ratio, introduced in Chen et al., 2018), Wrench (introduced in Kumar et al., 2018) or "none". Alternatively the user can supply its own vector or matrix of offsets (see note for specification of the user-supplied offsets).
+#' @param call Optional. The execution environment in which to set the local error call.
 #' @param ... Additional parameters passed on to [compute_offset()]
 #'
 #' @references Chen, L., Reeve, J., Zhang, L., Huang, S., Wang, X. and Chen, J. (2018) GMPR: A robust normalization method for zero-inflated count data with application to microbiome sequencing data. PeerJ, 6, e4600 \doi{10.7717/peerj.4600}
@@ -429,7 +453,7 @@ species_variance <- function(counts, groups = rep(1, nrow(counts)), depths_as_of
 #' )
 #' proper_data$Abundance
 #' proper_data$Offset
-prepare_data <- function(counts, covariates, offset = "TSS", ...) {
+prepare_data <- function(counts, covariates, offset = "TSS", call = rlang::caller_env(), ...) {
   ## Convert counts and covariates to expected format
   counts     <- data.matrix(counts, rownames.force = TRUE)
   covariates <- as.data.frame(covariates)
@@ -448,15 +472,19 @@ prepare_data <- function(counts, covariates, offset = "TSS", ...) {
   counts <- counts[samples, , drop = FALSE]
   ## Replace NA with 0s
   if (any(is.na(counts))) {
+    cli::cli_warn(c(
+      "!" = "There is at least one NA value in {.var counts}.",
+      "i" = "{.cls {sum(is.na(counts))}} NA value{?s} in {.var counts} {?has/have} been replaced with 0."
+    ), call = call)
     counts[is.na(counts)] <- 0
-    warning("NA values in count table replaced with 0.")
   }
   ## filter out empty samples
   empty_samples <- which(rowSums(counts) == 0)
-  if (length(empty_samples)) {
-    warning(paste0("Sample(s) ",
-                   paste(samples[empty_samples], collapse = " and "),
-                   " dropped for lack of positive counts."))
+  if (length(empty_samples) > 0) { # Add > 0
+    cli::cli_warn(c(
+      "!" = "There  is at least one empty sample in {.var counts}.",
+      "i" = "{.cls {length(empty_samples)}} sample{?s} ({.cls {samples[empty_samples]}}) in {.var counts} {?has/have} been dropped for lack of positive counts."
+    ), call = call)
     samples <- samples[-empty_samples]
     counts <- counts[samples, ,drop = FALSE]
   }
@@ -511,10 +539,12 @@ prepare_data <- function(counts, covariates, offset = "TSS", ...) {
 compute_offset <- function(counts, offset = c("TSS", "GMPR", "RLE", "CSS", "Wrench", "TMM", "none"), scale = c("none", "count"), ...) {
   ## special behavior for data.frame
   if (inherits(offset, "data.frame")) {
-    stop(
-  "You supplied a data.frame to compute_offset(). Did you mean to supply a numeric matrix?
-  Try converting your data.frame to a matrix with as.matrix()."
-  )
+    cli::cli_abort(c(
+      "{.var offset} must be an available scheme or a vector or matrix of offsets.",
+      "x" = "You supplied a data.frame for {.var offset}",
+      "i" = "Did you mean to supply a numeric matrix?",
+      "i" = "Try converting your data.frame to a matrix with `as.matrix()`."
+    ))
   }
   ## special behavior for numeric offset
   if (is.numeric(offset)) {

diff --git a/R/plot_utils.R b/R/plot_utils.R
@@ -86,6 +86,7 @@ circle <- function(center = c(0, 0), radius = 1, npoints = 100) {
   return(data.frame(x = xx, y = yy))
 }
 
+#' @importFrom scales alpha
 GeomCircle <- ggplot2::ggproto("GeomCircle",
     ggplot2::Geom,
     required_aes = c("x", "y", "radius"),

diff --git a/R/utils-zipln.R b/R/utils-zipln.R
@@ -27,15 +27,15 @@ extract_model_zi <- function(call, envir) {
   call_args <- c(as.list(call_args), list(xlev = attr(call$formula, "xlevels"), na.action = NULL))
 
   ## Extract terms for ZI and PLN components
-  terms <- .extract_terms_zi(as.formula(call$formula, env = envir))
+  terms <- .extract_terms_zi(as.formula(eval(call$formula, env = envir)))
   ## eval the call in the parent environment with adjustement due to ZI terms
   call_args$formula <- terms$formula
   frame <- do.call(stats::model.frame, call_args, envir = envir)
 
   ## Save level for predict function
   xlevels <- list(PLN = .getXlevels(terms$PLN, frame))
   if (!is.null(terms$ZI)) xlevels$ZI = .getXlevels(terms$ZI, frame)
-  attr(call$formula, "xlevels") <- xlevels
+  if (!is.null(xlevels$PLN)) attr(call$formula, "xlevels") <- xlevels
 
   ## Create the set of matrices to fit the PLN model
   X  <- model.matrix(terms$PLN, frame, xlev = xlevels$PLN)

diff --git a/inst/case_studies/oaks_tree.R b/inst/case_studies/oaks_tree.R
@@ -97,14 +97,16 @@ factoextra::fviz_pca_biplot(
 
 ## Network inference with sparce covariance estimation
 
-system.time(myZIPLNnets <- ZIPLNnetwork(Abundance ~ 0 + tree + offset(log(Offset)), zi = "single", data = oaks, control = ZIPLNnetwork_param(min_ratio = 0.1)))
-
 system.time(myPLNnets <- PLNnetwork(Abundance ~ 0 + tree + offset(log(Offset)), data = oaks, control = PLNnetwork_param(min_ratio = 0.1)))
 plot(myPLNnets)
 plot(getBestModel(myPLNnets, "EBIC"))
 # stability_selection(myPLNnets)
 # plot(getBestModel(myPLNnets, "StARS", stability = .975))
 
+system.time(myZIPLNnets <- ZIPLNnetwork(Abundance ~ 0 + tree + offset(log(Offset)), zi = "single", data = oaks, control = ZIPLNnetwork_param(min_ratio = 0.1)))
+plot(myZIPLNnets)
+plot(getBestModel(myZIPLNnets, "EBIC"))
+
 ## Mixture model to recover tree structure
 system.time(my_mixtures <- PLNmixture(Abundance ~ 1 + offset(log(Offset)), data = oaks, clusters = 1:5))
 

diff --git a/man/barents.Rd b/man/barents.Rd
diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd