mratsim · Vindaar · Oct 7, 2023 · Oct 4, 2023 · Oct 4, 2023 · Oct 5, 2023
diff --git a/src/arraymancer/linear_algebra.nim b/src/arraymancer/linear_algebra.nim
@@ -7,8 +7,9 @@ import
                     special_matrices,
                     decomposition,
                     decomposition_rand,
-                    linear_systems]
+                    linear_systems,
+                    algebra]
 
 export
   least_squares, special_matrices,
-  decomposition, decomposition_rand, linear_systems
+  decomposition, decomposition_rand, linear_systems, algebra
diff --git a/src/arraymancer/linear_algebra/algebra.nim b/src/arraymancer/linear_algebra/algebra.nim
@@ -0,0 +1,54 @@
+# Copyright (c) 2018 the Arraymancer contributors
+# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
+# This file may not be copied, modified, or distributed except according to those terms.
+
+import std/complex
+import
+  ../tensor
+
+proc conjugate*[T: Complex32 | Complex64](A: Tensor[T]): Tensor[T] =
+  ## Return the element-wise complex conjugate of a tensor of complex numbers.
+  ## The complex conjugate of a complex number is obtained by changing the sign of its imaginary part.
+  A.map_inline(x.conjugate)
+
+proc pinv*[T: SomeFloat](A: Tensor[T], rcond = 1e-15): Tensor[T] =
+  ## Compute the (Moore-Penrose) pseudo-inverse of a matrix.
+  ##
+  ## Calculate the generalized inverse of a matrix using its
+  ## singular-value decomposition (SVD) and including all
+  ## *large* singular values.
+  ##
+  ## Input:
+  ##   - A: the rank-2 tensor to invert
+  ##   - rcond: Cutoff ratio for small singular values.
+  ##            Singular values less than or equal to
+  ##            `rcond * largest_singular_value` are set to zero.
+  var (U, S, Vt) = A.svd()
+  let epsilon = S.max() * rcond
+  let S_size = S.shape[0]
+  var S_inv = zeros[T]([S_size, S_size])
+  const unit = T(1.0)
+  for n in 0..<S.shape[0]:
+    S_inv[n, n] = if abs(S[n]) > epsilon: unit / S[n] else: 0.0
+  Vt.transpose * S_inv * U.transpose
+
+proc pinv*[T: Complex32 | Complex64](A: Tensor[T], rcond = 1e-15): Tensor[T] =
+  ## Compute the (Moore-Penrose) pseudo-inverse of a matrix.
+  ##
+  ## Calculate the generalized inverse of a matrix using its
+  ## singular-value decomposition (SVD) and including all
+  ## *large* singular values.
+  ##
+  ## Input:
+  ##   - A: the rank-2 tensor to invert
+  ##   - rcond: Cutoff ratio for small singular values.
+  ##            Singular values less than or equal to
+  ##            `rcond * largest_singular_value` are set to zero.
+  var (U, S, Vt) = A.svd()
+  let epsilon = S.max() * rcond
+  let S_size = S.shape[0]
+  var S_inv = zeros[T]([S_size, S_size])
+  const unit = complex(1.0)
+  for n in 0..<S.shape[0]:
+    S_inv[n, n] = if abs(S[n]) > epsilon: unit / S[n] else: complex(0.0)
+  Vt.conjugate.transpose * S_inv * U.conjugate.transpose
diff --git a/src/arraymancer/linear_algebra/decomposition.nim b/src/arraymancer/linear_algebra/decomposition.nim
@@ -16,7 +16,7 @@ template `^^`(s, i: untyped): untyped =
 # Full decompositions
 # -------------------------------------------
 
-proc symeig*[T: SomeFloat](a: Tensor[T], return_eigenvectors: static bool = false, uplo: static char = 'U'): tuple[eigenval, eigenvec: Tensor[T]] {.inline.}=
+proc symeig*[T: SupportedDecomposition](a: Tensor[T], return_eigenvectors: static bool = false, uplo: static char = 'U'): tuple[eigenval, eigenvec: Tensor[T]] {.inline.}=
   ## Compute the eigenvalues and eigen vectors of a symmetric matrix
   ## Input:
   ##   - A symmetric matrix of shape [n x n]
@@ -36,7 +36,7 @@ proc symeig*[T: SomeFloat](a: Tensor[T], return_eigenvectors: static bool = fals
   var a = a.clone(colMajor)
   syevr(a, uplo, return_eigenvectors, 0, a.shape[0] - 1, result.eigenval, result.eigenvec, scratchspace)
 
-proc symeig*[T: SomeFloat](a: Tensor[T], return_eigenvectors: static bool = false,
+proc symeig*[T: SupportedDecomposition](a: Tensor[T], return_eigenvectors: static bool = false,
   uplo: static char = 'U',
   slice: HSlice[int or BackwardsIndex, int or BackwardsIndex]): tuple[eigenval, eigenvec: Tensor[T]] {.inline.}=
   ## Compute the eigenvalues and eigen vectors of a symmetric matrix
@@ -60,7 +60,7 @@ proc symeig*[T: SomeFloat](a: Tensor[T], return_eigenvectors: static bool = fals
   var a = a.clone(colMajor)
   syevr(a, uplo, return_eigenvectors, a ^^ slice.a, a ^^ slice.b, result.eigenval, result.eigenvec, scratchspace)
 
-proc qr*[T: SomeFloat](a: Tensor[T]): tuple[Q, R: Tensor[T]] =
+proc qr*[T: SupportedDecomposition](a: Tensor[T]): tuple[Q, R: Tensor[T]] =
   ## Compute the QR decomposition of an input matrix ``a``
   ## Decomposition is done through the Householder method
   ## without pivoting.
@@ -90,7 +90,7 @@ proc qr*[T: SomeFloat](a: Tensor[T]): tuple[Q, R: Tensor[T]] =
   orgqr(result.Q, tau, scratchspace)
   result.Q = result.Q[_, 0..<k]
 
-proc lu_permuted*[T: SomeFloat](a: Tensor[T]): tuple[PL, U: Tensor[T]] =
+proc lu_permuted*[T: SupportedDecomposition](a: Tensor[T]): tuple[PL, U: Tensor[T]] =
   ## Compute the pivoted LU decomposition of an input matrix ``a``.
   ##
   ## The decomposition solves the equation:
@@ -121,7 +121,7 @@ proc lu_permuted*[T: SomeFloat](a: Tensor[T]): tuple[PL, U: Tensor[T]] =
   tril_unit_diag_mut(result.PL)
   laswp(result.PL, pivot_indices, pivot_from = -1)
 
-proc svd*[T: SomeFloat](A: Tensor[T]): tuple[U, S, Vh: Tensor[T]] =
+proc svd*[T: SupportedDecomposition; U: SomeFloat](A: Tensor[T], _: typedesc[U]): tuple[U: Tensor[T], S: Tensor[U], Vh: Tensor[T]] =
   ## Compute the Singular Value Decomposition of an input matrix ``a``
   ## Decomposition is done through recursive divide & conquer.
   ##
@@ -174,3 +174,8 @@ proc svd*[T: SomeFloat](A: Tensor[T]): tuple[U, S, Vh: Tensor[T]] =
 
     result.Vh = V.transpose()
     result.U = Ut.transpose()
+
+proc svd*[T: SupportedDecomposition](A: Tensor[T]): auto =
+  when T is SomeFloat: svd(A, T)
+  elif T is Complex32: svd(A, float32)
+  else: svd(A, float)
diff --git a/src/arraymancer/linear_algebra/helpers/decomposition_lapack.nim b/src/arraymancer/linear_algebra/helpers/decomposition_lapack.nim
@@ -24,11 +24,27 @@
 
 # SVD and Eigenvalues/eigenvectors decomposition
 # --------------------------------------------------------------------------------------
+import std / [math, complex]
+
+type
+  SupportedDecomposition* = SomeFloat | Complex64 | Complex32
+
+proc dataPointerMaybeCast[T](arg: Tensor[T]): auto =
+  when T is SomeFloat: arg.get_data_ptr
+  elif T is Complex32: cast[ptr lapack_complex_float](arg.get_data_ptr)
+  elif T is Complex64: cast[ptr lapack_complex_double](arg.get_data_ptr)
+  else: {.error: "Invalid type: " & $T.}
+
+proc dataPointerMaybeCast[T](arg: seq[T]): auto =
+  when T is SomeFloat: arg[0].addr
+  elif T is Complex32: cast[ptr lapack_complex_float](arg[0].addr)
+  elif T is Complex64: cast[ptr lapack_complex_double](arg[0].addr)
+  else: {.error: "Invalid type: " & $T.}
 
 overload(syevr, ssyevr)
 overload(syevr, dsyevr)
 
-proc syevr*[T: SomeFloat](a: var Tensor[T], uplo: static char, return_eigenvectors: static bool,
+proc syevr*[T: SupportedDecomposition](a: var Tensor[T], uplo: static char, return_eigenvectors: static bool,
   low_idx: int, high_idx: int, eigenval, eigenvec: var Tensor[T], scratchspace: var seq[T]) =
   ## Wrapper for LAPACK syevr routine (Symmetric Recursive Eigenvalue Decomposition)
   ##
@@ -101,11 +117,11 @@
   # Querying workspaces sizes
   syevr(jobz, interval, uplo_layout,
         n.unsafeAddr,
-        a.get_data_ptr, n.unsafeAddr, # lda
+        a.dataPointerMaybeCast, n.unsafeAddr, # lda
         vl.addr, vu.addr, il.addr, iu.addr,
         abstol.addr, m.addr,
-        eigenval.get_data_ptr,
-        eigenvec.get_data_ptr, n.unsafeAddr, # ldz
+        eigenval.dataPointerMaybeCast,
+        eigenvec.dataPointerMaybeCast, n.unsafeAddr, # ldz
         isuppz_ptr,
         work_size.addr, lwork.addr,
         iwork_size.addr, liwork.addr, info.addr)
@@ -119,11 +135,11 @@
   # Decompose matrix
   syevr(jobz, interval, uplo_layout,
         n.unsafeAddr,
-        a.get_data_ptr, n.unsafeAddr,
+        a.dataPointerMaybeCast, n.unsafeAddr,
         vl.addr, vu.addr, il.addr, iu.addr,
         abstol.addr, m.addr,
-        eigenval.get_data_ptr,
-        eigenvec.get_data_ptr, n.unsafeAddr,
+        eigenval.dataPointerMaybeCast,
+        eigenvec.dataPointerMaybeCast, n.unsafeAddr,
         isuppz_ptr,
         scratchspace[0].addr, lwork.addr,
         iwork[0].addr, liwork.addr, info.addr)
@@ -150,8 +166,10 @@
 
 overload(geqrf, sgeqrf)
 overload(geqrf, dgeqrf)
+overload(geqrf, cgeqrf)
+overload(geqrf, zgeqrf)
 
-proc geqrf*[T: SomeFloat](Q: var Tensor[T], tau: var seq[T], scratchspace: var seq[T]) =
+proc geqrf*[T: SupportedDecomposition](Q: var Tensor[T], tau: var seq[T], scratchspace: var seq[T]) =
   ## Wrapper for LAPACK geqrf routine (GEneral QR Factorization)
   ## Decomposition is done through Householder Reflection
   ## and without pivoting
@@ -172,15 +190,15 @@
     info: int32
 
   # Querying workspace size
-  geqrf(m.unsafeAddr, n.unsafeAddr, Q.get_data_ptr, m.unsafeAddr, # lda
+  geqrf(m.unsafeAddr, n.unsafeAddr, Q.dataPointerMaybeCast, m.unsafeAddr, # lda
         tau[0].addr, work_size.addr, lwork.addr, info.addr)
 
   # Allocating workspace
   lwork = work_size.int32
   scratchspace.setLen(lwork)
 
   # Decompose matrix
-  geqrf(m.unsafeAddr, n.unsafeAddr, Q.get_data_ptr, m.unsafeAddr, # lda
+  geqrf(m.unsafeAddr, n.unsafeAddr, Q.dataPointerMaybeCast, m.unsafeAddr, # lda
         tau[0].addr, scratchspace[0].addr, lwork.addr, info.addr)
   if unlikely(info < 0):
     raise newException(ValueError, "Illegal parameter in geqrf: " & $(-info))
@@ -190,8 +208,14 @@
 
 overload(gesdd, sgesdd)
 overload(gesdd, dgesdd)
-
-proc gesdd*[T: SomeFloat](a: var Tensor[T], U, S, Vh: var Tensor[T], scratchspace: var seq[T]) =
+overload(gesdd, cgesdd)
+overload(gesdd, zgesdd)
+
+proc gesdd*[T: SupportedDecomposition; X: SupportedDecomposition](a: var Tensor[T],
+                                                                  U: var Tensor[T],
+                                                                  S: var Tensor[X],
+                                                                  Vh: var Tensor[T],
+                                                                  scratchspace: var seq[T]) =
   ## Wrapper for LAPACK gesdd routine
   ## (GEneral Singular value Decomposition by Divide & conquer)
   ##
@@ -249,34 +273,57 @@
     iwork = newSeqUninit[int32](8 * k)
 
   U.newMatrixUninitColMajor(ldu, ucol)
-  S = newTensorUninit[T](k.int)
+  S = newTensorUninit[X](k.int)
   Vh.newMatrixUninitColMajor(ldvt, n)
 
-  # Querying workspace size
-  gesdd(jobz, m.unsafeAddr, n.unsafeAddr,
-        a.get_data_ptr, m.unsafeAddr, # lda
-        S.get_data_ptr,
-        U.get_data_ptr, ldu.unsafeAddr,
-        Vh.get_data_ptr, ldvt.unsafeAddr,
-        work_size.addr,
-        lwork.addr, iwork[0].addr,
-        info.addr
-       )
-
-  # Allocating workspace
-  lwork = work_size.int32
-  scratchspace.setLen(lwork)
+  when T is SomeFloat:
+    # Querying workspace size
+    gesdd(jobz, m.unsafeAddr, n.unsafeAddr,
+          a.dataPointerMaybeCast, m.unsafeAddr, # lda
+          S.dataPointerMaybeCast,
+          U.dataPointerMaybeCast, ldu.unsafeAddr,
+          Vh.dataPointerMaybeCast, ldvt.unsafeAddr,
+          work_size.addr,
+          lwork.addr, iwork[0].addr,
+          info.addr
+         )
+
+    # Allocating workspace
+    lwork = work_size.int32
+    scratchspace.setLen(lwork)
+
+    # Decompose matrix
+    gesdd(jobz, m.unsafeAddr, n.unsafeAddr,
+          a.dataPointerMaybeCast, m.unsafeAddr, # lda
+          S.dataPointerMaybeCast,
+          U.dataPointerMaybeCast, ldu.unsafeAddr,
+          Vh.dataPointerMaybeCast, ldvt.unsafeAddr,
+          scratchspace.dataPointerMaybeCast,
+          lwork.addr, iwork[0].addr,
+          info.addr
+         )
+  else:
+    # recommendation for `jobz` != 'N`
+    let rwork_size = max(1, 5*(min(m,n))^2 + 7*min(m,n))
+    var rwork = newSeq[X](rwork_size)
+
+    # Allocating workspace, based on recommendation for `jobz` == `S`. Cannot
+    # retrieve by query when `rwork` is given as well I think. Maybe doing something
+    # wrong
+    lwork = 3*(min(m.int32, n.int32))^2 + max(max(m.int32, n.int32), 4*(min(m.int32, n.int32))^2 + 4*min(m.int32, n.int32))
+    scratchspace.setLen(lwork)
+
+    # Decompose matrix
+    gesdd(jobz, m.unsafeAddr, n.unsafeAddr,
+          a.dataPointerMaybeCast, m.unsafeAddr, # lda
+          S.dataPointerMaybeCast,
+          U.dataPointerMaybeCast, ldu.unsafeAddr,
+          Vh.dataPointerMaybeCast, ldvt.unsafeAddr,
+          scratchspace.dataPointerMaybeCast,
+          lwork.addr, rwork.dataPointerMaybeCast, iwork[0].addr,
+          info.addr
+         )
 
-  # Decompose matrix
-  gesdd(jobz, m.unsafeAddr, n.unsafeAddr,
-        a.get_data_ptr, m.unsafeAddr, # lda
-        S.get_data_ptr,
-        U.get_data_ptr, ldu.unsafeAddr,
-        Vh.get_data_ptr, ldvt.unsafeAddr,
-        scratchspace[0].addr,
-        lwork.addr, iwork[0].addr,
-        info.addr
-       )
 
   if info > 0:
     # TODO, this should not be an exception, not converging is something that can happen and should
@@ -290,8 +337,10 @@
 # --------------------------------------------------------------------------------------
 overload(getrf, sgetrf)
 overload(getrf, dgetrf)
+overload(getrf, cgetrf)
+overload(getrf, zgetrf)
 
-proc getrf*[T: SomeFloat](lu: var Tensor[T], pivot_indices: var seq[int32]) =
+proc getrf*[T: SupportedDecomposition](lu: var Tensor[T], pivot_indices: var seq[int32]) =
   ## Wrapper for LAPACK getrf routine
   ## (GEneral ??? Pivoted LU Factorization)
   ##
@@ -307,7 +356,7 @@
   var info: int32
 
   # Decompose matrix
-  getrf(m.unsafeAddr, n.unsafeAddr, lu.get_data_ptr, m.unsafeAddr, # lda
+  getrf(m.unsafeAddr, n.unsafeAddr, lu.dataPointerMaybeCast, m.unsafeAddr, # lda
         pivot_indices[0].addr, info.addr)
   if info < 0:
     raise newException(ValueError, "Illegal parameter in lu factorization getrf: " & $(-info))

diff --git a/src/arraymancer/ml/metrics/common_error_functions.nim b/src/arraymancer/ml/metrics/common_error_functions.nim
@@ -16,23 +16,27 @@ import ../../tensor
 
 # ####################################################
 
-proc squared_error*[T](y, y_true: T): T {.inline.} =
+proc squared_error*[T: SomeFloat](y, y_true: T): T {.inline.} =
   ## Squared error for a single value, |y_true - y| ^2
   result = square(y_true - y)
 
-proc squared_error*[T](y, y_true: Tensor[T]): Tensor[T] {.noinit.} =
+proc squared_error*[T: SomeFloat](y, y_true: Complex[T]): T {.inline.} =
+  ## Squared error for a single value, |y_true - y| ^2
+  result = abs(y_true - y) ^ 2
+
+proc squared_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): Tensor[T] {.noinit.} =
   ## Element-wise squared error for a tensor, |y_true - y| ^2
   result = map2_inline(y_true, y, squared_error(x,y))
 
-proc mean_squared_error*[T](y, y_true: Tensor[T]): T =
+proc mean_squared_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): T =
   ## Also known as MSE or L2 loss, mean squared error between elements:
   ## sum(|y_true - y| ^2)/m
   ## where m is the number of elements
   result = y.squared_error(y_true).mean()
 
 # ####################################################
 
-proc relative_error*[T: SomeFloat](y, y_true: T): T {.inline.} =
+proc relative_error*[T: SomeFloat](y, y_true: T | Complex[T]): T {.inline.} =
   ## Relative error, |y_true - y|/max(|y_true|, |y|)
   ## Normally the relative error is defined as |y_true - y| / |y_true|,
   ## but here max is used to make it symmetric and to prevent dividing by zero,
@@ -43,14 +47,14 @@ proc relative_error*[T: SomeFloat](y, y_true: T): T {.inline.} =
     return 0.T
   result = abs(y_true - y) / denom
 
-proc relative_error*[T](y, y_true: Tensor[T]): Tensor[T] {.noinit.} =
+proc relative_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): Tensor[T] {.noinit.} =
   ## Relative error for Tensor, element-wise |y_true - x|/max(|y_true|, |x|)
   ## Normally the relative error is defined as |y_true - x| / |y_true|,
   ## but here max is used to make it symmetric and to prevent dividing by zero,
   ## guaranteed to return zero in the case when both values are zero.
   result = map2_inline(y, y_true, relative_error(x,y))
 
-proc mean_relative_error*[T](y, y_true: Tensor[T]): T =
+proc mean_relative_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): T =
   ## Mean relative error for Tensor, mean of the element-wise
   ## |y_true - y|/max(|y_true|, |y|)
   ## Normally the relative error is defined as |y_true - y| / |y_true|,
@@ -60,16 +64,15 @@ proc mean_relative_error*[T](y, y_true: Tensor[T]): T =
 
 # ####################################################
 
-proc absolute_error*[T: SomeFloat](y, y_true: T): T {.inline.} =
+proc absolute_error*[T: SomeFloat](y, y_true: T | Complex[T]): T {.inline.} =
   ## Absolute error for a single value, |y_true - y|
-  # We require float (and not complex as complex.abs will cause issue)
   result = abs(y_true - y)
 
-proc absolute_error*[T](y, y_true: Tensor[T]): Tensor[T] {.noinit.} =
+proc absolute_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): Tensor[T] {.noinit.} =
   ## Element-wise absolute error for a tensor
   result = map2_inline(y, y_true, y.absolute_error(x))
 
-proc mean_absolute_error*[T](y, y_true: Tensor[T]): T =
+proc mean_absolute_error*[T: SomeFloat](y, y_true: Tensor[T] | Tensor[Complex[T]]): T =
   ## Also known as L1 loss, absolute error between elements:
   ## sum(|y_true - y|)/m
   ## where m is the number of elements