diff --git a/NAMESPACE b/NAMESPACE
index d7fc1044..fea48fe1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -124,6 +124,7 @@ export(color.mixo)
 export(color.spectral)
 export(dctii_m_transforms)
 export(explained_variance)
+export(facewise_crossproduct)
 export(facewise_product)
 export(facewise_transpose)
 export(ft)
diff --git a/R/tens.mproduct.R b/R/tens.mproduct.R
index 20a2079a..4f1f06f3 100644
--- a/R/tens.mproduct.R
+++ b/R/tens.mproduct.R
@@ -279,8 +279,40 @@ m_product <- function(
   }
   return(minv(
     Reduce(
+      # bpparam MUST be NULL here to prevent double parallelisation!
       function(a, b) .binary_facewise(a, b, bpparam = NULL),
       lapply(list(...), m)
     )
   ))
 }
+
+#' Tensor cross product
+#'
+#' Compute the equivalent of ft(a) %fp% b
+#'
+#' @param a Tensor input.
+#' @param b Tensor input.
+#' @return Tensor facewise cross product.
+#' @author Brendan Lu
+#' @export
+facewise_crossproduct <- function(a, b) {
+  na <- dim(a)[1]
+  pa <- dim(a)[2]
+  ta <- dim(a)[3]
+
+  nb <- dim(b)[1]
+  pb <- dim(b)[2]
+  tb <- dim(b)[3]
+
+  # error: different t for each input
+  stopifnot(ta == tb)
+  t <- ta
+  # error: non-conforming facewise dimensions
+  stopifnot(na == nb)
+
+  fcp_ab <- array(0, dim = c(pa, pb, t))
+  for (i in seq_len(t)) {
+    fcp_ab[, , i] <- crossprod(a[, , i], b[, , i])
+  }
+  return(fcp_ab)
+}
diff --git a/R/tens.tpls.R b/R/tens.tpls.R
index d67b9c15..cf00361e 100644
--- a/R/tens.tpls.R
+++ b/R/tens.tpls.R
@@ -74,7 +74,11 @@
 
 #' Tensor PLS-like regression
 #'
-#' Developed @ Melbourne Integrative Genomics
+#' Developed @ Melbourne Integrative Genomics. Intuition: Fourier-based m
+#' transforms compact the data across time points, but preserve the information
+#' association for each sample and each feature. In doing so, pls works by
+#' calculating the singular vectors associated with a 'global' (across time
+#' points) top singular value, and deflating just that relevant face.
 #'
 #' @param x Tensor input.
 #' @param y Tensor input.
@@ -160,6 +164,7 @@ tpls <- function(
     # simplest algorithm - just uses everything from the tsvdm call of XtY based
     # without any deflation steps
     tsvdm_decomposition_xty <- tsvdm(
+      # bltodo: change to facewise_crossproduct when this is improved
       ft(xhat) %fp% yhat,
       transform = FALSE,
       svals_matrix_form = TRUE
@@ -205,7 +210,11 @@ tpls <- function(
     for (i in seq_len(ncomp)) {
       # compute tsvdm
       # BLTODO: tensor crossprod to speed up?
+      # bltodo: minor as ncomp is usually small, but we technically do not need
+      # to recalculate the whole tsvdmm, we only need svd re-done on the face
+      # that was deflated as everything else is still the same
       tsvdm_decomposition_xty <- tsvdm(
+        # bltodo: change to facewise_crossproduct when this is improved
         ft(xhat) %fp% yhat,
         transform = FALSE,
         svals_matrix_form = TRUE
diff --git a/R/tens.tplsda.R b/R/tens.tplsda.R
index ce3b4ef3..b4042178 100644
--- a/R/tens.tplsda.R
+++ b/R/tens.tplsda.R
@@ -15,11 +15,11 @@
     return(y)
   }
   # input is a matrix with a single column
-  if (length(dim(y)) == 2 && dim(y)[2] != 1) {
+  if (length(dim(y)) == 2 && dim(y)[2] == 1) {
     return(c(y))
   }
   # input is a tensor with a single column
-  if (length(dim(y)) == 3 && dim(y)[2] == 1 && dim(y)[3] != 1) {
+  if (length(dim(y)) == 3 && dim(y)[2] == 1 && dim(y)[3] == 1) {
     return(c(y))
   }
   stop("
@@ -65,7 +65,8 @@
 #' Developed @ Melbourne Integrative Genomics
 #'
 #' @param x Tensor input.
-#' @param y Tensor input.
+#' @param y A vector / column matrix / column tensor with n class labels, or a
+#' n x t matrix / n x 1 x t tensor if multilevel = TRUE.
 #' @param multilevel Set to TRUE if y contains repeated class measurements
 #' across the t timepoints specified in the x tensor.
 #' @param ncomp The estimated number of components. ncomp must be explicitly set
@@ -108,6 +109,9 @@ tplsda <- function(
     # repeated observations for all samples
     factors_list <- lapply(array(seq_len(t)), function(i) factor(y_tens[, , i]))
     unique_levels <- unique(unlist(lapply(factors_list, levels)))
+    if (length(unique_levels) == 1) {
+      stop("y should contain 2 or more distinct classes")
+    }
 
     y <- array(0, dim = c(n, length(unique_levels), t))
     for (i in seq_len(t)) {
diff --git a/man/facewise_crossproduct.Rd b/man/facewise_crossproduct.Rd
new file mode 100644
index 00000000..eae42324
--- /dev/null
+++ b/man/facewise_crossproduct.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tens.mproduct.R
+\name{facewise_crossproduct}
+\alias{facewise_crossproduct}
+\title{Tensor cross product}
+\usage{
+facewise_crossproduct(a, b)
+}
+\arguments{
+\item{a}{Tensor input.}
+
+\item{b}{Tensor input.}
+}
+\value{
+Tensor facewise cross product.
+}
+\description{
+Compute the equivalent of ft(a) %fp% b
+}
+\author{
+Brendan Lu
+}
diff --git a/man/tpls.Rd b/man/tpls.Rd
index 75d2ae81..97803e6b 100644
--- a/man/tpls.Rd
+++ b/man/tpls.Rd
@@ -47,7 +47,11 @@ of parallelisation. Does not have any effect if transform functions
 explicitly set using \code{m}, \code{minv}.}
 }
 \description{
-Developed @ Melbourne Integrative Genomics
+Developed @ Melbourne Integrative Genomics. Intuition: Fourier-based m
+transforms compact the data across time points, but preserve the information
+association for each sample and each feature. In doing so, pls works by
+calculating the singular vectors associated with a 'global' (across time
+points) top singular value, and deflating just the
 }
 \author{
 Brendan Lu
diff --git a/man/tplsda.Rd b/man/tplsda.Rd
index 2b81dae5..06983acd 100644
--- a/man/tplsda.Rd
+++ b/man/tplsda.Rd
@@ -19,7 +19,8 @@ tplsda(
 \arguments{
 \item{x}{Tensor input.}
 
-\item{y}{Tensor input.}
+\item{y}{A vector / column matrix / column tensor with n class labels, or a
+n x t matrix / n x 1 x t tensor if multilevel = TRUE.}
 
 \item{multilevel}{Set to TRUE if y contains repeated class measurements
 across the t timepoints specified in the x tensor.}
diff --git a/tests/testthat/test-tens.mproduct.R b/tests/testthat/test-tens.mproduct.R
index e74a04d2..c74a74b9 100644
--- a/tests/testthat/test-tens.mproduct.R
+++ b/tests/testthat/test-tens.mproduct.R
@@ -190,3 +190,19 @@ test_that(
     )
   }
 )
+
+test_that(
+  "facewise_crossproduct works as intended",
+  code = {
+    n <- 2
+    p <- 4
+    t <- 3
+    set.seed(1)
+    test_tensor1 <- array(rnorm(n * p * t), dim = c(n, p, t))
+
+    expect_equal(
+      facewise_crossproduct(test_tensor1, test_tensor1),
+      ft(test_tensor1) %fp% test_tensor1
+    )
+  }
+)
diff --git a/tests/testthat/test-tens.tpls.R b/tests/testthat/test-tens.tpls.R
index 74e32db3..550a2cd8 100644
--- a/tests/testthat/test-tens.tpls.R
+++ b/tests/testthat/test-tens.tpls.R
@@ -41,7 +41,6 @@ test_that(
     p <- 5
     t <- 3
     k <- min(n, p)
-    ncomp_input <- 2
 
     # this test fails if `test_tensor` is rank deficient, as is the case if we
     # use array(1:24, dim = c(3, 4, 2))
@@ -54,7 +53,6 @@ test_that(
 
     tpca_obj <- tpca(
       test_tensor,
-      ncomp = ncomp_input,
       m = m,
       minv = minv,
       # turn off centering to get over problem 3
@@ -65,7 +63,6 @@ test_that(
 
     tpls_obj <- tpls(
       test_tensor, test_tensor,
-      ncomp = ncomp_input,
       m = m,
       minv = minv,
       mode = "tsvdm",
@@ -97,7 +94,7 @@ test_that(
     q <- 9
     t <- 1
     k <- min(n, p, q)
-    ncomp_input <- 2
+    ncomp_input <- 4
 
     set.seed(1)
     test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t))
@@ -152,7 +149,7 @@ test_that(
     q <- 10
     t <- 1
     k <- min(n, p, q)
-    ncomp_input <- 2
+    ncomp_input <- 4
 
     set.seed(1)
     test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t))
diff --git a/tests/testthat/test-tens.tplsda.R b/tests/testthat/test-tens.tplsda.R
index 3a74e4d0..b8224a51 100644
--- a/tests/testthat/test-tens.tplsda.R
+++ b/tests/testthat/test-tens.tplsda.R
@@ -66,3 +66,30 @@ test_that(
     )
   }
 )
+
+test_that(
+  "tplsda multilevel y transformation is appropriate",
+  code = {
+    n <- 6
+    p <- 7
+    q <- 9
+    t <- 3
+    ncomp_input <- 2
+
+    set.seed(1)
+    test_x <- array(rnorm(n * p * t, mean = 0, sd = 5), dim = c(n, p, t))
+    test_y_mat <- array(letters[1:(n * t)], dim = c(n, t))
+    test_y_tens <- array(letters[1:(n * t)], dim = c(n, 1, t))
+
+    tplsda_mat <- tplsda(test_x, test_y_mat, multilevel = TRUE)
+    tplsda_tens <- tplsda(test_x, test_y_tens, multilevel = TRUE)
+
+    # both multilevel y inputs should lead to the same underlying tpls call
+    expect_equal(tplsda_mat$y, tplsda_tens$y)
+
+    # the y tensor that goes into the tpls call should have the appropriate
+    # number of columns corresponding to all unique categories found across
+    # all timepoints specified in y
+    expect_equal(dim(tplsda_mat$y), c(n, n * t, 3))
+  }
+)