mixOmicsTeam · Max-Bladen · Apr 5, 2022 · Apr 11, 2022 · Apr 11, 2022 · Apr 11, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -63,5 +63,5 @@ biocViews: ImmunoOncology,
            MultipleComparison, 
            Classification, 
            Regression
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 Encoding: UTF-8
diff --git a/R/MCV.block.splsda.R b/R/MCV.block.splsda.R
@@ -115,12 +115,12 @@ MCVfold.block.splsda <-
         # prediction of all samples for each test.keepX and  nrep at comp fixed
         folds.input = folds
 
+        n = nrow(X[[1]])
+        repeated.measure = 1:n
+
         #-- define the folds --#
         if (validation ==  "Mfold")
         {
-            n = nrow(X[[1]])
-            repeated.measure = 1:n
-
             if (is.null(folds) || !is.numeric(folds) || folds < 2 || folds > n)
             {
                 stop("Invalid number of folds.")

diff --git a/R/network.R b/R/network.R
@@ -813,7 +813,7 @@ network <- function(mat,
 
     #-- check cutoff
     if (round(max(abs(w)), 2) == 0)
-        stop("There is no correlation between these blocks whith these components.",
+        stop("There is no correlation between these blocks with these components.",
              "Try a different value of 'comp'.", call. = FALSE)
 
     if (!is.numeric(cutoff) || cutoff < 0 || cutoff > 1)

diff --git a/R/pca.R b/R/pca.R
@@ -147,9 +147,13 @@ pca <- function(X,
     if (is.null(ncomp))
         ncomp = min(nrow(X), ncol(X))
 
+    if (!is.numeric(ncomp)) {
+        stop("`ncomp` must be numeric", call. = FALSE)
+    }
+
     ncomp = round(ncomp)
 
-    if (!is.numeric(ncomp) || ncomp < 1 || !is.finite(ncomp))
+    if (ncomp < 1 || !is.finite(ncomp))
         stop("invalid value for 'ncomp'.", call. = FALSE)
 
     if (ncomp > min(ncol(X), nrow(X)))

diff --git a/inst/testdata/testdata-auroc.rda b/inst/testdata/testdata-auroc.rda
diff --git a/inst/testdata/testdata-background.predict.rda b/inst/testdata/testdata-background.predict.rda
diff --git a/inst/testdata/testdata-cim.rda b/inst/testdata/testdata-cim.rda
diff --git a/inst/testdata/testdata-circosPlot.rda b/inst/testdata/testdata-circosPlot.rda
diff --git a/inst/testdata/testdata-diablo.rda b/inst/testdata/testdata-diablo.rda
diff --git a/inst/testdata/testdata-network.rda b/inst/testdata/testdata-network.rda
diff --git a/inst/testdata/testdata-pca.rda b/inst/testdata/testdata-pca.rda
diff --git a/inst/testdata/testdata-perf.diablo.rda b/inst/testdata/testdata-perf.diablo.rda
diff --git a/inst/testdata/testdata-perf.mint.splsda.rda b/inst/testdata/testdata-perf.mint.splsda.rda
diff --git a/inst/testdata/testdata-plotIndiv.rda b/inst/testdata/testdata-plotIndiv.rda
diff --git a/inst/testdata/testdata-plotLoadings.rda b/inst/testdata/testdata-plotLoadings.rda
diff --git a/inst/testdata/testdata-plotVar.rda b/inst/testdata/testdata-plotVar.rda
diff --git a/inst/testdata/testdata-predict.rda b/inst/testdata/testdata-predict.rda
diff --git a/inst/testdata/testdata-tune.block.splsda.rda b/inst/testdata/testdata-tune.block.splsda.rda
diff --git a/inst/testdata/testdata-tune.mint.splsda.rda b/inst/testdata/testdata-tune.mint.splsda.rda
diff --git a/inst/testdata/testdata-tune.splsda.rda b/inst/testdata/testdata-tune.splsda.rda
diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R
@@ -24,3 +24,190 @@
     require(testthat)
     expect_equal(round(numeric_value, digits = digits), round(expected, digits = digits))
 }
+
+
+#' From input X and Y dataframes, yields the smallest set of training and testing
+#' samples to remain valid for any mixOmics method. Caters sample selection to
+#' if method requires multiblock, multigroup or multilevel frameworks.
+#' 
+#' @param X X dataframe for any mixOmics method. Can be a list of multiple dataframes if multiblock
+#' @param Y Y dataframe or factor vector for any mixOmics method
+#' @param S study factor vector for multigroup frameworks
+#' @param ML repreated measures vector for multilevel frameworks
+#' @param n.tr number of training samples (per class if DA)
+#' @param n.te number of testing samples (per class if DA)
+#' @param seed controls the sample selection seed
+#' @return list of X, Y, study and multilevel components split by training and testing samples
+#' @keywords internal
+.minimal_train_test_subset <- function(X=NULL, 
+                                       Y=NULL, 
+                                       S=NULL, 
+                                       ML=NULL,
+                                       n.tr=2,
+                                       n.te=1,
+                                       seed=16) {
+    set.seed(seed)
+
+    DA = is.factor(Y) # logical gate for DA framework
+    MULTIGROUP = !is.null(S) # logical gate for multigroup framework
+    MULTILEVEL = !is.null(ML) # logical gate for multilevel framework
+    MULTIBLOCK = !is.data.frame(X) && !is.matrix(X) # logical gate for multiblock framework
+
+    tr <- c() # initialise indicies for training and testing samples
+    te <- c()
+
+    if (MULTILEVEL) { # any multilevel method
+
+        n.indivs <- 3 # default number of repeated samples to consider
+
+        #if(DA) { n.indivs <- length(unique(Y))-1 } # if DA, set specific quantity
+
+        # only look at the first n.indiv samples were measured the maximum amount of times
+        indivs <- unname(which(table(ML) == max(table(ML))))[1:n.indivs] 
+
+        for (i in 1:length(indivs)) { # for each repeated sample ...
+            s <- indivs[i]
+
+            rel.sam <- which(ML==s) # determine the corresponding rows
+            tr.sam <- sample(rel.sam, n.tr, F) # take n.tr of these for training (1:n.tr+(i-1))
+            te.sam <- setdiff(rel.sam, tr.sam) # and take n.te of these for testing
+
+            tr <- c(tr, tr.sam)
+            te <- c(te, te.sam)
+
+        }
+    } 
+    else if(DA) { # if the framework is DA ... 
+
+        for(c in unique(Y)) { # for each class ...
+
+            if (MULTIGROUP) { # MINT.(s)PLSDA
+                for (s in unique(S)){ # for each study ...
+                    # determine the rows with that class and for that study
+                    rel.sam <- intersect(which(Y==c), which(S==s)) 
+                    tr <- c(tr, rel.sam[1:n.tr]) # take first n.tr samples for training
+                    # if that samples's class and study is not already present in testing, add it
+                    if (!(s %in% S[te] || c %in% Y[te])) {te <- c(te, rel.sam[(n.tr+1):(n.tr+n.te)]) } # THIS WILL FUCK UP !!!!!!!!!!!!!!!!!
+                }
+            } else { # (BLOCK).(s)PLSDA
+                rows <- which(Y == c)
+                tr <- c(tr, rows[1:n.tr+1])
+                te <- c(te, rows[(n.tr+1):(n.tr+n.te)])
+            }
+
+        }
+
+        if (MULTIGROUP) { # ensure that all studies in training are present in testing
+            tr.te.study.diff <- setdiff(unique(S[tr]), unique(S[te]))
+            if (length(tr.te.study.diff) != 0) {
+                for (s in tr.te.study.diff) {
+                    te <- c(te, which(S == s)[1])  # THIS WILL FUCK UP !!!!!!!!!!!!!!!!!
+                }
+            }
+        }
+
+    } 
+    else { 
+        if (MULTIGROUP) { # MINT.(S)PLS
+            for (s in unique(S)){
+                rel.sam <- which(S==s)
+                tr <- c(tr, rel.sam[1:n.tr])
+                te <- c(te, rel.sam[(n.tr+1):(n.tr+n.te)])
+            }
+        } else { # (BLOCK).(s)PLS
+            tr <- 1:n.tr
+            te <- (n.tr+1):(n.tr+n.te)
+        }
+    }
+
+
+
+    if(MULTIBLOCK) { # subset each block iteratively if multiblock
+        X.tr <- list()         
+        X.te <- list()
+
+        for (block in names(X)) {
+            X.tr[[block]] <- X[[block]][tr,]
+            X.te[[block]] <- X[[block]][te,]
+        }
+    } else { # otherwise just subset X
+        X.tr <- X[tr, ]
+        X.te <- X[te, ]
+    }
+
+    if (DA) { # if Y is a factor, index list
+        Y.tr <- Y[tr]
+        Y.te <- Y[te]
+    } else { # if Y is a data.frame, index df
+        Y.tr <- Y[tr,]
+        Y.te <- Y[te,]
+    }
+
+    out <- list(X.tr = X.tr,
+                X.te = X.te,
+                Y.tr = Y.tr,
+                Y.te = Y.te)
+
+    if (MULTILEVEL) { # include repeated measures
+        out$ML.tr <- ML[tr]
+        out$ML.te <- ML[te]
+    }
+
+    if (MULTIGROUP) { # include study
+        out$S.tr <- as.factor(S[tr])
+        out$S.te <- as.factor(S[te])
+    }
+
+
+    return(out)
+}
+
+
+.quiet <- function(x) { 
+  sink(tempfile()) 
+  on.exit(sink()) 
+  invisible(force(x)) 
+} 
+
+
+.gt.homogeneity <- function(items) {
+
+  novel.items <- list()
+  novel.items["basic"] <- items[1]
+
+  for (idx in 2:length(items)) {
+
+    i <- items[[idx]]
+    novel<-T
+
+    for (n.i in novel.items) {
+      if (setequal(i, n.i)) {
+        novel<-F
+      }
+    }
+
+    if (novel) {
+      novel.items[[names(items)[idx]]] <- i
+    }
+  }
+
+  homo.list <- list()
+  for (n.i in names(novel.items)) {
+    homo.list[[n.i]] <- vector()
+  }
+
+  for (n.i.idx in 1:length(novel.items)) {
+    n.i <- novel.items[n.i.idx]
+    for (i.idx in 1:length(items)) {
+      i <- items[i.idx]
+      if (setequal(i, n.i)) {
+        homo.list[[n.i.idx]] <- c(homo.list[[n.i.idx]], names(items)[i.idx])
+      }
+    }
+  }
+
+  return(list(novel.items=novel.items,
+              homo.list=homo.list))
+}
+
+