mixOmicsTeam · Max-Bladen · Dec 13, 2022 · Mar 9, 2022 · Mar 13, 2022 · Apr 26, 2022
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
@@ -33,7 +33,7 @@ jobs:
       docker_tag: ${{ steps.setup.outputs.docker_tag }}
     steps:
       - uses: rlespinasse/github-slug-action@v3.x
-      - uses: r-lib/actions/setup-r@master
+      - uses: r-lib/actions/setup-r@v2
       - name: Get R/Bioc versions
         id: setup
         run: |
@@ -50,18 +50,18 @@ jobs:
             bioc="devel"
             r="$r_devel"
           fi
-          echo ::set-output name=r::$r
-          echo ::set-output name=bioc::$bioc
+          echo "r=$r" >> $GITHUB_OUTPUT
+          echo "bioc=$bioc" >> $GITHUB_OUTPUT
 
           ## Docker
           # Only Dockerise for 'master' or 'RELEASE_*' branches
           dockerise='false'
           [[ github.event_name != 'schedule' && ($BRANCH_NAME == "master" || $BRANCH_NAME =~ "release_") ]] && dockerise='true'
-          echo ::set-output name=dockerise::$dockerise
+          echo "dockerise=$dockerise" >> $GITHUB_OUTPUT
           # Docker tag is 'github' for master and the branch name for release branches
           docker_tag="github"
           [[ $BRANCH_NAME =~ "release_" ]] && docker_tag=$BRANCH_NAME
-          echo ::set-output name=docker_tag::$docker_tag
+          echo "docker_tag=$docker_tag" >> $GITHUB_OUTPUT
 
   R-CMD-check:
     needs: [gatekeeper, versions]
@@ -91,7 +91,7 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Set up R ▶️
-        uses: r-lib/actions/setup-r@master
+        uses: r-lib/actions/setup-r@v2
         if: matrix.config.image == null
         with:
           r-version: ${{ matrix.config.r }}

diff --git a/.github/workflows/push2bioc.yml b/.github/workflows/push2bioc.yml
@@ -1,8 +1,8 @@
 on:
   workflow_dispatch:
     inputs:
-      push:
-        description: "Type 'yes' to push. Otherwise, only diff with remote is shown."
+      action:
+        description: "Type 'push' to push, 'pull' to pull latest changes from Bioconductor's remote. Otherwise, only diff with remote is shown."
         required: true
         default: "show diff only"
 
@@ -37,11 +37,14 @@ jobs:
           git config user.name '$MAINTAINER_NAME'
           git config user.email '$MAINTAINER_EMAIL'
           git remote add bioc git@git.bioconductor.org:packages/mixOmics.git
-          #--- push
+          #---
           git fetch --all # for logs in case of failure
           current_branch=${{ github.ref_name }}
-          if [  "${{ github.event.inputs.push }}" == "yes" ]; then
+          if [  "${{ github.event.inputs.action }}" == "push" ]; then
             git push bioc $current_branch:$current_branch
+          elif [  "${{ github.event.inputs.action }}" == "pull" ]; then
+            git merge -X theirs bioc/$current_branch --no-edit
+            git push origin $current_branch:$current_branch
           else
             # show diffs
             git log $current_branch..bioc/$current_branch --oneline --decorate

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mixOmics
 Type: Package
 Title: Omics Data Integration Project
-Version: 6.19.4
+Version: 6.23.2
 Depends: R (>= 3.5.0), 
          MASS, 
          lattice, 
@@ -34,16 +34,17 @@ Authors@R:
       person("Florian", "Rohart", role = "aut"), 
       person("Ignacio", "Gonzalez", role = "aut"),
       person("Sebastien", "Dejean", role = "aut"), 
+      person("Max", "Bladen", role = "aut", email = "mbladen19@gmail.com"), 
       ## key contributors
-      person("Al", "Abadi", "J", role = c("ctb", "cre"), email = "al.jal.abadi@gmail.com"), 
+      person("Al J", "Abadi", role = c("ctb", "cre"), email = "al.jal.abadi@gmail.com"), 
       person("Benoit", "Gautier", role = "ctb"), 
       person("Francois", "Bartolo", role = "ctb"), 
       ## also contributions from
       person("Pierre", "Monget", role = "ctb"),
       person("Jeff", "Coquery", role = "ctb"),
       person("FangZou", "Yao", role = "ctb"),
       person("Benoit", "Liquet", role = "ctb"))
-Maintainer: Al J Abadi <al.jal.abadi@gmail.com>
+Maintainer: Max Bladen <mbladen19@gmail.com>
 Description: Multivariate methods are well suited to large omics data sets where the number of variables (e.g. genes, proteins, metabolites) is much larger than the number of samples (patients, cells, mice). They have the appealing properties of reducing the dimension of the data by using instrumental variables (components), which are defined as combinations of all variables. Those components are then used to produce useful graphical outputs that enable better understanding of the relationships and correlation structures between the different data sets that are integrated. mixOmics offers a wide range of multivariate methods for the exploration and integration of biological datasets with a particular focus on variable selection. The package proposes several sparse multivariate models we have developed to identify the key variables that are highly correlated, and/or explain the biological outcome of interest. The data that can be analysed with mixOmics may come from high throughput sequencing technologies, such as omics data (transcriptomics, metabolomics, proteomics, metagenomics etc) but also beyond the realm of omics (e.g. spectral imaging). The methods implemented in mixOmics can also handle missing values without having to delete entire rows with missing data. A non exhaustive list of methods include variants of generalised Canonical Correlation Analysis, sparse Partial Least Squares and sparse Discriminant Analysis. Recently we implemented integrative methods to combine multiple data sets: N-integration with variants of Generalised Canonical Correlation Analysis and P-integration with variants of multi-group Partial Least Squares.
 License: GPL (>= 2)
 URL: http://www.mixOmics.org
@@ -63,5 +64,5 @@ biocViews: ImmunoOncology,
            MultipleComparison, 
            Classification, 
            Regression
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.2
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
@@ -268,6 +268,7 @@ importFrom(stats,predict)
 importFrom(stats,quantile)
 importFrom(stats,reorder)
 importFrom(stats,sd)
+importFrom(stats,setNames)
 importFrom(stats,t.test)
 importFrom(stats,var)
 importFrom(tidyr,gather)

diff --git a/R/LOGOCV.R b/R/LOGOCV.R
@@ -263,8 +263,12 @@ LOGOCV <- function(X,
                 })
             }
 
-            # average BER over the study
-            error.mean[[ijk]] = apply(error, 2, mean)
+            # weighted average BER over the studies
+            error.mean[[ijk]] = apply(error, 2, function(x) {
+                sum(x * table(study)/length(study))
+            })
+
+
             keepX.opt[[ijk]] =
                 which(error.mean[[ijk]] ==  min(error.mean[[ijk]]))[1]
 

diff --git a/R/MCV.block.splsda.R b/R/MCV.block.splsda.R
@@ -115,21 +115,28 @@ MCVfold.block.splsda <-
         # prediction of all samples for each test.keepX and  nrep at comp fixed
         folds.input = folds
 
+        n = nrow(X[[1]])
+        repeated.measure = 1:n
+
         #-- define the folds --#
         if (validation ==  "Mfold")
         {
-            n = nrow(X[[1]])
-            repeated.measure = 1:n
 
-            if (is.null(folds) || !is.numeric(folds) || folds < 2 || folds > n)
-            {
-                stop("Invalid number of folds.")
-            } else {
+            if (is.null(folds) || !is.numeric(folds)) {
+                stop("'folds' need to be non-NULL and numeric")
+            } 
+            else if (folds < 2) {
+                stop("'folds' needs to be at least 2")  
+            } 
+            else if (folds > n) {
+                stop("'folds' cannot be greater than the number of input samples") 
+            } 
+            else {
                 M = round(folds)
             }
         } else if (validation ==  "loo") {
             M = n
-            if(nrepeat != 1) stop("nrepeat should be set to 1 with validation='loo'\n")
+            if(nrepeat != 1) { stop("nrepeat should be set to 1 with validation='loo'\n") }
         }
 
         all_folds <- lapply(seq_len(nrepeat), function(nrep) {

diff --git a/R/S3methods-plot.tune.R b/R/S3methods-plot.tune.R
@@ -198,118 +198,6 @@ plot.tune.spls <-
         res$gg.plot
     }
 
-## -------------------------- plot.tune.splsda -------------------------- ##
-#' @name plot.tune
-#' @method plot tune.splsda
-#' @importFrom reshape2 melt
-#' @export
-plot.tune.splsda <-
-    function(x, optimal = TRUE, sd = NULL, col, ...)
-    {
-        # to satisfy R CMD check that doesn't recognise x, y and group (in aes)
-        y = Comp = lwr = upr = NULL
-
-        if (!is.logical(optimal))
-            stop("'optimal' must be logical.", call. = FALSE)
-        sd = .change_if_null(sd, !is.null(x$error.rate.sd))
-        error <- x$error.rate
-        if(sd & !is.null(x$error.rate.sd))
-        {
-            error.rate.sd = x$error.rate.sd
-            ylim = range(c(error + error.rate.sd), c(error - error.rate.sd))
-        } else {
-            error.rate.sd = NULL
-            ylim = range(error)
-        }
-
-        select.keepX <- x$choice.keepX[colnames(error)]
-        comp.tuned = length(select.keepX)
-
-        legend=NULL
-        measure = x$measure
-
-        if (length(select.keepX) < 10)
-        {
-            #only 10 colors in color.mixo
-            if(missing(col))
-                col = color.mixo(seq_len(comp.tuned))
-        } else {
-            #use color.jet
-            if(missing(col))
-                col = color.jet(comp.tuned)
-        }
-        if(length(col) != comp.tuned)
-            stop("'col' should be a vector of length ", comp.tuned,".")
-
-        if(measure == "overall")
-        {
-            ylab = "Classification error rate"
-        } else if (measure == "BER")
-        {
-            ylab = "Balanced error rate"
-        } else if (measure == "MSE"){
-            ylab = "MSE"
-        }else if (measure == "MAE"){
-            ylab = "MAE"
-        }else if (measure == "Bias"){
-            ylab = "Bias"
-        }else if (measure == "R2"){
-            ylab = "R2"
-        }else if (measure == "AUC"){
-            ylab = "AUC"
-        }
-
-        #legend
-        names.comp = substr(colnames(error),5,10) # remove "comp" from the name
-        if(length(x$choice.keepX) == 1){
-            #only first comp tuned
-            legend = "1"
-        } else if(length(x$choice.keepX) == comp.tuned) {
-            # all components have been tuned
-            legend = c("1", paste("1 to", names.comp[-1]))
-        } else {
-            #first components were not tuned
-            legend = paste("1 to", names.comp)
-        }
-
-
-        # creating data.frame with all the information
-        df = melt(error)
-        colnames(df) = c("x","Comp","y")
-        df$Comp = factor(df$Comp, labels=legend)
-
-        p = ggplot(df, aes(x = x, y = y, color = Comp)) +
-            labs(x = "Number of selected features", y = ylab) +
-            theme_bw() +
-            geom_line()+ geom_point()
-        p = p+ scale_x_continuous(trans='log10') +
-            scale_color_manual(values = col)
-
-        # error bar
-        if(!is.null(error.rate.sd))
-        {
-            dferror = melt(error.rate.sd)
-            df$lwr = df$y - dferror$value
-            df$upr = df$y + dferror$value
-
-            #adding the error bar to the plot
-            p = p + geom_errorbar(data=df,aes(ymin=lwr, ymax=upr))
-        }
-
-        if(optimal)
-        {
-            index = NULL
-            for(i in seq_len(comp.tuned))
-                index = c(index, which(df$x == select.keepX[i] & df$Comp == levels(df$Comp)[i]))
-
-            # adding the choseen keepX to the graph
-            p=p + geom_point(data=df[index,],size=7, shape = 18)
-            p = p + guides(color = guide_legend(override.aes =
-                                                    list(size=0.7,stroke=1)))
-        }
-
-        p
-    }
 ## ------------------------ plot.tune.block.(s)plsda ---------------------- ##
 #' @importFrom gridExtra grid.arrange
 #' @rdname plot.tune
@@ -636,8 +524,10 @@ plot.tune.spls1 <-
         p
     }
 
+## -------------------------- plot.tune.splsda -------------------------- ##
 #' @rdname plot.tune
 #' @method plot tune.splsda
 #' @export
 plot.tune.splsda <- plot.tune.spls1
 # TODO add examples
+
diff --git a/R/check_entry.R b/R/check_entry.R
@@ -663,30 +663,37 @@ Check.entry.wrapper.mint.block = function(X,
         nzv.A = lapply(A, nearZeroVar)
         for(q in 1:length(A))
         {
-            if (length(nzv.A[[q]]$Position) > 0 &&(!DA & q == indY))
-            {
-                names.remove.X = colnames(A[[q]])[nzv.A[[q]]$Position]
-                A[[q]] = A[[q]][, -nzv.A[[q]]$Position, drop=FALSE]
-                #if (verbose)
-                #warning("Zero- or near-zero variance predictors.\n
-                #Reset predictors matrix to not near-zero variance predictors.\n
-                # See $nzv for problematic predictors.")
-                if (ncol(A[[q]]) == 0)
-                    stop(paste0("No more variables in",A[[q]]))
-                
-                #need to check that the keepA[[q]] is now not higher than ncol(A[[q]])
-                if (any(keepA[[q]] > ncol(A[[q]])))
-                {
-                    ind = which(keepA[[q]] > ncol(A[[q]]))
-                    keepA[[q]][ind] = ncol(A[[q]])
-                }
-            }
+          if (length(nzv.A[[q]]$Position) <= 0) { next }
+          if (DA && q == indY) { next }
+
+          names.remove.X = colnames(A[[q]])[nzv.A[[q]]$Position]
+          A[[q]] = A[[q]][, -nzv.A[[q]]$Position, drop=FALSE]
+          #if (verbose)
+          #warning("Zero- or near-zero variance predictors.\n
+          #Reset predictors matrix to not near-zero variance predictors.\n
+          # See $nzv for problematic predictors.")
+          if (ncol(A[[q]]) == 0)
+              stop(paste0("No more variables in",A[[q]]))
+
+          #need to check that the keepA[[q]] is now not higher than ncol(A[[q]])
+          if (any(keepA[[q]] > ncol(A[[q]])))
+          {
+              ind = which(keepA[[q]] > ncol(A[[q]]))
+              keepA[[q]][ind] = ncol(A[[q]])
+          }
 
         }
     } else {
         nzv.A=NULL
     }
 
+    for(q in 1:length(A))
+    {
+      vars <- apply(A[[q]], 2, sd)^2
+      if (length(which(vars==0)) >0) {
+        stop(sprintf("There are features with zero variance in block '%s'. If nearZeroVar() function or 'near.zero.var' parameter hasn't been used,  please use it. If you have used one of these, you may need to manually filter out these features.", names(A)[q]), call.=F)
+      }
+    }
     return(list(A=A, ncomp=ncomp, study=study, keepA=keepA,
                 indY=indY, design=design, init=init, nzv.A=nzv.A))
 }

diff --git a/R/circosPlot.R b/R/circosPlot.R
@@ -36,6 +36,7 @@
 #' @param size.variables size of the variable labels
 #' @param size.labels size of the block labels
 #' @param legend Logical. Whether the legend should be added. Default is TRUE.
+#' @param legend.title String. Name of the legend. Defaults to "Expression".
 #' @param linkWidth Numeric. Specifies the range of sizes used for lines linking
 #' the correlated variables (see details). Must be of length 2 or 1. Default to c(1). See details.
 #' @param ... For object of class \code{block.splsda}, advanced plot parameters:
@@ -98,6 +99,7 @@ circosPlot <- function(object, ...) UseMethod('circosPlot')
              size.variables = 0.25,
              size.labels = 1,
              legend = TRUE,
+             legend.title = "Expression",
              linkWidth = 1,
              ...)
     {
@@ -403,7 +405,7 @@ circosPlot <- function(object, ...) UseMethod('circosPlot')
                    col = color.cor, pch = 19, cex=size.legend, bty = "n")
             # Second legend bottom righ corner
             if(line==TRUE)
-                legend(x=figSize-(circleR/3), y = (circleR/3), title="Expression",
+                legend(x=figSize-(circleR/3), y = (circleR/3), title=legend.title,
                        legend=levels(Y),  ## changed PAM50 to Y
                        col = lineCols, pch = 19, cex=size.legend, bty = "n",ncol=ncol.legend)
             # third legend top left corner