diff --git a/NAMESPACE b/NAMESPACE index d7fc1044..fea48fe1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -124,6 +124,7 @@ export(color.mixo) export(color.spectral) export(dctii_m_transforms) export(explained_variance) +export(facewise_crossproduct) export(facewise_product) export(facewise_transpose) export(ft) diff --git a/R/tens.mproduct.R b/R/tens.mproduct.R index 20a2079a..4f1f06f3 100644 --- a/R/tens.mproduct.R +++ b/R/tens.mproduct.R @@ -279,8 +279,40 @@ m_product <- function( } return(minv( Reduce( + # bpparam MUST be NULL here to prevent double parallelisation! function(a, b) .binary_facewise(a, b, bpparam = NULL), lapply(list(...), m) ) )) } + +#' Tensor cross product +#' +#' Compute the equivalent of ft(a) %fp% b +#' +#' @param a Tensor input. +#' @param b Tensor input. +#' @return Tensor facewise cross product. +#' @author Brendan Lu +#' @export +facewise_crossproduct <- function(a, b) { + na <- dim(a)[1] + pa <- dim(a)[2] + ta <- dim(a)[3] + + nb <- dim(b)[1] + pb <- dim(b)[2] + tb <- dim(b)[3] + + # error: different t for each input + stopifnot(ta == tb) + t <- ta + # error: non-conforming facewise dimensions + stopifnot(na == nb) + + fcp_ab <- array(0, dim = c(pa, pb, t)) + for (i in seq_len(t)) { + fcp_ab[, , i] <- crossprod(a[, , i], b[, , i]) + } + return(fcp_ab) +} diff --git a/R/tens.tpls.R b/R/tens.tpls.R index d67b9c15..cf00361e 100644 --- a/R/tens.tpls.R +++ b/R/tens.tpls.R @@ -74,7 +74,11 @@ #' Tensor PLS-like regression #' -#' Developed @ Melbourne Integrative Genomics +#' Developed @ Melbourne Integrative Genomics. Intuition: Fourier-based m +#' transforms compact the data across time points, but preserve the information +#' association for each sample and each feature. In doing so, pls works by +#' calculating the singular vectors associated with a 'global' (across time +#' points) top singular value, and deflating just that relevant face. #' #' @param x Tensor input. #' @param y Tensor input. @@ -160,6 +164,7 @@ tpls <- function( # simplest algorithm - just uses everything from the tsvdm call of XtY based # without any deflation steps tsvdm_decomposition_xty <- tsvdm( + # bltodo: change to facewise_crossproduct when this is improved ft(xhat) %fp% yhat, transform = FALSE, svals_matrix_form = TRUE @@ -205,7 +210,11 @@ tpls <- function( for (i in seq_len(ncomp)) { # compute tsvdm # BLTODO: tensor crossprod to speed up? + # bltodo: minor as ncomp is usually small, but we technically do not need + # to recalculate the whole tsvdmm, we only need svd re-done on the face + # that was deflated as everything else is still the same tsvdm_decomposition_xty <- tsvdm( + # bltodo: change to facewise_crossproduct when this is improved ft(xhat) %fp% yhat, transform = FALSE, svals_matrix_form = TRUE diff --git a/R/tens.tplsda.R b/R/tens.tplsda.R index ce3b4ef3..b4042178 100644 --- a/R/tens.tplsda.R +++ b/R/tens.tplsda.R @@ -15,11 +15,11 @@ return(y) } # input is a matrix with a single column - if (length(dim(y)) == 2 && dim(y)[2] != 1) { + if (length(dim(y)) == 2 && dim(y)[2] == 1) { return(c(y)) } # input is a tensor with a single column - if (length(dim(y)) == 3 && dim(y)[2] == 1 && dim(y)[3] != 1) { + if (length(dim(y)) == 3 && dim(y)[2] == 1 && dim(y)[3] == 1) { return(c(y)) } stop(" @@ -65,7 +65,8 @@ #' Developed @ Melbourne Integrative Genomics #' #' @param x Tensor input. -#' @param y Tensor input. +#' @param y A vector / column matrix / column tensor with n class labels, or a +#' n x t matrix / n x 1 x t tensor if multilevel = TRUE. #' @param multilevel Set to TRUE if y contains repeated class measurements #' across the t timepoints specified in the x tensor. #' @param ncomp The estimated number of components. ncomp must be explicitly set @@ -108,6 +109,9 @@ tplsda <- function( # repeated observations for all samples factors_list <- lapply(array(seq_len(t)), function(i) factor(y_tens[, , i])) unique_levels <- unique(unlist(lapply(factors_list, levels))) + if (length(unique_levels) == 1) { + stop("y should contain 2 or more distinct classes") + } y <- array(0, dim = c(n, length(unique_levels), t)) for (i in seq_len(t)) { diff --git a/man/facewise_crossproduct.Rd b/man/facewise_crossproduct.Rd new file mode 100644 index 00000000..eae42324 --- /dev/null +++ b/man/facewise_crossproduct.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tens.mproduct.R +\name{facewise_crossproduct} +\alias{facewise_crossproduct} +\title{Tensor cross product} +\usage{ +facewise_crossproduct(a, b) +} +\arguments{ +\item{a}{Tensor input.} + +\item{b}{Tensor input.} +} +\value{ +Tensor facewise cross product. +} +\description{ +Compute the equivalent of ft(a) %fp% b +} +\author{ +Brendan Lu +} diff --git a/man/tpls.Rd b/man/tpls.Rd index 75d2ae81..97803e6b 100644 --- a/man/tpls.Rd +++ b/man/tpls.Rd @@ -47,7 +47,11 @@ of parallelisation. Does not have any effect if transform functions explicitly set using \code{m}, \code{minv}.} } \description{ -Developed @ Melbourne Integrative Genomics +Developed @ Melbourne Integrative Genomics. Intuition: Fourier-based m +transforms compact the data across time points, but preserve the information +association for each sample and each feature. In doing so, pls works by +calculating the singular vectors associated with a 'global' (across time +points) top singular value, and deflating just the } \author{ Brendan Lu diff --git a/man/tplsda.Rd b/man/tplsda.Rd index 2b81dae5..06983acd 100644 --- a/man/tplsda.Rd +++ b/man/tplsda.Rd @@ -19,7 +19,8 @@ tplsda( \arguments{ \item{x}{Tensor input.} -\item{y}{Tensor input.} +\item{y}{A vector / column matrix / column tensor with n class labels, or a +n x t matrix / n x 1 x t tensor if multilevel = TRUE.} \item{multilevel}{Set to TRUE if y contains repeated class measurements across the t timepoints specified in the x tensor.} diff --git a/tests/testthat/test-tens.mproduct.R b/tests/testthat/test-tens.mproduct.R index e74a04d2..c74a74b9 100644 --- a/tests/testthat/test-tens.mproduct.R +++ b/tests/testthat/test-tens.mproduct.R @@ -190,3 +190,19 @@ test_that( ) } ) + +test_that( + "facewise_crossproduct works as intended", + code = { + n <- 2 + p <- 4 + t <- 3 + set.seed(1) + test_tensor1 <- array(rnorm(n * p * t), dim = c(n, p, t)) + + expect_equal( + facewise_crossproduct(test_tensor1, test_tensor1), + ft(test_tensor1) %fp% test_tensor1 + ) + } +) diff --git a/tests/testthat/test-tens.tpls.R b/tests/testthat/test-tens.tpls.R index 74e32db3..550a2cd8 100644 --- a/tests/testthat/test-tens.tpls.R +++ b/tests/testthat/test-tens.tpls.R @@ -41,7 +41,6 @@ test_that( p <- 5 t <- 3 k <- min(n, p) - ncomp_input <- 2 # this test fails if `test_tensor` is rank deficient, as is the case if we # use array(1:24, dim = c(3, 4, 2)) @@ -54,7 +53,6 @@ test_that( tpca_obj <- tpca( test_tensor, - ncomp = ncomp_input, m = m, minv = minv, # turn off centering to get over problem 3 @@ -65,7 +63,6 @@ test_that( tpls_obj <- tpls( test_tensor, test_tensor, - ncomp = ncomp_input, m = m, minv = minv, mode = "tsvdm", @@ -97,7 +94,7 @@ test_that( q <- 9 t <- 1 k <- min(n, p, q) - ncomp_input <- 2 + ncomp_input <- 4 set.seed(1) test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t)) @@ -152,7 +149,7 @@ test_that( q <- 10 t <- 1 k <- min(n, p, q) - ncomp_input <- 2 + ncomp_input <- 4 set.seed(1) test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t)) diff --git a/tests/testthat/test-tens.tplsda.R b/tests/testthat/test-tens.tplsda.R index 3a74e4d0..b8224a51 100644 --- a/tests/testthat/test-tens.tplsda.R +++ b/tests/testthat/test-tens.tplsda.R @@ -66,3 +66,30 @@ test_that( ) } ) + +test_that( + "tplsda multilevel y transformation is appropriate", + code = { + n <- 6 + p <- 7 + q <- 9 + t <- 3 + ncomp_input <- 2 + + set.seed(1) + test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t)) + test_y_mat <- array(letters[1:(n * t)], dim = c(n, t)) + test_y_tens <- array(letters[1:(n * t)], dim = c(n, 1, t)) + + tplsda_mat <- tplsda(test_x, test_y_mat, multilevel = TRUE) + tplsda_tens <- tplsda(test_x, test_y_tens, multilevel = TRUE) + + # both multilevel y inputs should lead to the same underlying tpls call + expect_equal(tplsda_mat$y, tplsda_tens$y) + + # the y tensor that goes into the tpls call should have the appropriate + # number of columns corresponding to all unique categories found across + # all timepoints specified in y + expect_equal(dim(tplsda_mat$y), c(n, n * t, 3)) + } +)