From f859521425f8b63c1d84789480b51013165f7065 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 3 May 2023 10:57:15 -0400 Subject: [PATCH 01/18] Start making a block coordinate diagonal decomposition --- btas/btas.h | 1 + btas/generic/cp_bcd.h | 394 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 395 insertions(+) create mode 100644 btas/generic/cp_bcd.h diff --git a/btas/btas.h b/btas/btas.h index 53a9b008..095fd517 100644 --- a/btas/btas.h +++ b/btas/btas.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h new file mode 100644 index 00000000..f8844834 --- /dev/null +++ b/btas/generic/cp_bcd.h @@ -0,0 +1,394 @@ +// +// Created by Karl Pierce on 4/29/23. +// + +#ifndef BTAS_GENERIC_CP_BCD_H +#define BTAS_GENERIC_CP_BCD_H + +#include + +#include +#include +#include +#include + +namespace btas{ + /** \brief Computes the Canonical Product (CP) decomposition of an order-N + tensor using block coordinate descent (BCD). + + This computes the CP decomposition of btas::Tensor objects with row + major storage only with fixed (compile-time) and variable (run-time) + ranks. Also provides Tucker and randomized Tucker-like compressions coupled + with CP-BCD decomposition. Does not support strided ranges. + + \warning this code takes a non-const reference \c tensor_ref but does + not modify the values. This is a result of API (reshape needs non-const tensor) + + Synopsis: + \code + // Constructors + CP_BCD A(tensor) // CP_BCD object with empty factor + // matrices and no symmetries + CP_BCD A(tensor, symms) // CP_ALS object with empty factor + // matrices and symmetries + + // Operations + A.compute_rank(rank, converge_test) // Computes the CP_BCD of tensor to + // rank, rank build and HOSVD options + + A.compute_rank_random(rank, converge_test) // Computes the CP_BCD of tensor to + // rank. Factor matrices built at rank + // with random numbers + + A.compute_error(converge_test, omega) // Computes the CP_BCD of tensor to + // 2-norm + // error < omega. + + //See documentation for full range of options + + // Accessing Factor Matrices + A.get_factor_matrices() // Returns a vector of factor matrices, if + // they have been computed + + A.reconstruct() // Returns the tensor computed using the + // CP factor matrices + \endcode + */ + + template > + class CP_BCD : public CP_ALS { + public: + using T = typename Tensor::value_type; + using RT = real_type_t; + using RTensor = rebind_tensor_t; + + using CP::A; + using CP::ndim; + using CP::symmetries; + using typename CP::ind_t; + using typename CP::ord_t; + using CP_ALS::tensor_ref; + using CP_ALS::size; + + /// Create a CP BCD object, child class of the CP object + /// that stores the reference tensor. + /// Reference tensor has no symmetries. + /// \param[in] tensor the reference tensor to be decomposed. + CP_BCD(Tensor &tensor, ind_t block_size = 1) : CP_ALS(tensor), blocksize(block_size){ + } + + /// Create a CP BCD object, child class of the CP object + /// that stores the reference tensor. + /// Reference tensor has symmetries. + /// Symmetries should be set such that the higher modes index + /// are set equal to lower mode indices (a 4th order tensor, + /// where the second & third modes are equal would have a + /// symmetries of {0,1,1,3} + /// \param[in] tensor the reference tensor to be decomposed. + /// \param[in] symms the symmetries of the reference tensor. + CP_BCD(Tensor &tensor, std::vector &symms, ind_t block_size = 1) + : CP_ALS(tensor, symms), blocksize(block_size) { + } + + CP_BCD() = default; + + ~CP_BCD() = default; + + protected: + Tensor gradient, block_ref; // Stores gradient of BCD for fitting. + ind_t blocksize; // Size of block gradient + std::vector blockfactors; + + /// computed the CP decomposition using ALS to minimize the loss function for fixed rank \p rank + /// \param[in] rank The rank of the CP decomposition. + /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. + /// \param[in] dir The CP decomposition be computed without calculating the + /// Khatri-Rao product? + /// \param[in] max_als If CP decomposition is to finite + /// error, max_als is the highest rank approximation computed before giving up + /// on CP-ALS. Default = 1e5. + /// \param[in] calculate_epsilon Should the 2-norm + /// error be calculated ||T_exact - T_approx|| = epsilon. + /// \param[in] tcutALS + /// How small difference in factor matrices must be to consider ALS of a + /// single rank converged. Default = 0.1. + /// \param[in, out] epsilon The 2-norm + /// error between the exact and approximated reference tensor + /// \param[in,out] fast_pI Whether the pseudo inverse be computed using a fast cholesky decomposition, + /// on return \c fast_pI will be true if use of Cholesky was successful + virtual void ALS(ind_t rank, ConvClass &converge_test, bool dir, int max_als, bool calculate_epsilon, double &epsilon, + bool &fast_pI){ + std::vector order; + for(auto i = 0; i < ndim; ++i){ + order.emplace_back(i); + } + + block_ref = tensor_ref; + + // Number of full blocks with rank blocksize + // plus one more block with rank rank % blocksize + int n_full_blocks = int( rank / blocksize), + last_blocksize = rank % blocksize; + + long block_step = 0, z = 0; + this->AtA = std::vector(ndim); + // copying all of A to block factors. Then we are going to take slices of A + // that way we can leverage all of `CP_ALS` code without modification + blockfactors = A; + // this stores the factors from rank 0 to #blocks * blocksize + std::vector current_grads(ndim); + // Compute all the BCD of the full blocks + for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { + // Take the b-th block of the factor matrix also take the + // find the partial grammian for the blocked factors + for (size_t i = 0; i < ndim; ++i){ + auto & a_mat = A[i]; + auto lower = {z, block_step}, upper = {long(a_mat.extent(0)), block_step + blocksize}; + a_mat = make_view(blockfactors[i].range().slice(lower, upper), blockfactors[i].storage()); + contract(this->one, a_mat, {1, 2}, a_mat.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); + } + + // Do the ALS loop + //CP_ALS::ALS(blocksize, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); + // First do it manually so we know its right + // Compute ALS of the bth block against the gradient + // computed by subtracting the previous blocks from the reference + size_t count = 0; + bool is_converged = false; + bool matlab = true; + auto one_over_tref = 1.0 / norm(tensor_ref); + auto fit = 1.0, change = 0.0; + do { + ++count; + this->num_ALS++; + + for (size_t i = 0; i < ndim; i++) { + this->direct(i, blocksize, fast_pI, matlab, converge_test, gradient); + // BCD(i, block_step, block_step + blocksize, fast_pI, matlab, converge_test); + auto &ai = A[i]; + contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); + // Copy the block computed in A to the correct portion in blockfactors + // (this will replace A at the end) + copy_blocks(blockfactors[i], A[i], block_step, block_step + blocksize); + } + // Copy the lambda values into the correct location in blockfactors + size_t c = 0; + for (auto b = block_step; b < block_step + blocksize; ++b, ++c) { + blockfactors[ndim](b) = A[ndim](c); + } + // Test the system to see if converged. Doing the hard way first + // To compute the accuracy fully compute CP approximation using blocks 0 through b. + for (size_t i = 0; i < ndim; ++i) { + auto & a_mat = blockfactors[i]; + auto lower = {z, block_step}, upper = {long(a_mat.extent(0)), block_step + blocksize}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + std::cout << newfit << std::endl; + change = abs(fit - newfit); + fit = newfit; + std::cout << fit << "\t" << change << std::endl; + is_converged = (change < 0.001); + if(is_converged) { + auto grad = gradient - temp; + auto grad_ptr = grad.begin(); + for(auto ptr = tensor_ref.begin(); ptr != tensor_ref.end(); ++ptr, ++grad_ptr) + *(ptr) = *(grad_ptr); + } + }while(count < max_als && !is_converged); + std::cout << tensor_ref << std::endl; + } + // Fix tensor_ref + auto ptr = tensor_ref.begin(); + for(auto g_ptr = gradient.begin(); g_ptr != gradient.end(); ++g_ptr, ++ptr) + *(ptr) = *(g_ptr); + } + + void copy_blocks(Tensor & to, Tensor & from, ind_t block_start, ind_t block_end){ + auto tref_dim = to.extent(0); + auto to_rank = to.extent(1), from_rank = from.extent(1); + + auto to_ptr = to.data(), from_ptr = from.data(); + ind_t from_pos = 0; + for (ind_t i = 0, skip = 0; i < tref_dim; ++i, skip += to_rank){ + for (auto b = block_start; b < block_end; ++b, ++from_pos){ + *(to_ptr + skip + b) = *(from_ptr + from_pos); + } + } + } + /// Computes an optimized factor matrix holding all others constant. + /// No Khatri-Rao product computed, immediate contraction + + /// \param[in] n The mode being optimized, all other modes held constant + /// \param[in] rank The current rank, column dimension of the factor matrices + /// \param[in,out] fast_pI Should the pseudo inverse be computed using a fast cholesky decomposition + /// return if computing the \c fast_pI was successful. + /// \param[in, out] matlab If \c fast_pI = true then try to solve VA = B instead of taking pseudoinverse + /// in the same manner that matlab would compute the inverse. If this fails, variable will be manually + /// set to false and SVD will be used. + /// return if \c matlab was successful + /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged + + void BCD(size_t n, ind_t block_start, ind_t block_end, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0) { + // Determine if n is the last mode, if it is first contract with first mode + // and transpose the product + bool last_dim = n == ndim - 1; + // product of all dimensions + ord_t LH_size = size; + size_t contract_dim = last_dim ? 0 : ndim - 1; + ind_t offset_dim = tensor_ref.extent(n); + ind_t brank = block_end - block_start; + ind_t pseudo_rank = brank; + + // Store the dimensions which are available to hadamard contract + std::vector dimensions; + for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) { + dimensions.push_back(tensor_ref.extent(i)); + } + + // Modifying the dimension of tensor_ref so store the range here to resize + Range R = tensor_ref.range(); + + // Resize the tensor which will store the product of tensor_ref and the first factor matrix + Tensor temp = Tensor(size / tensor_ref.extent(contract_dim), brank); + gradient.resize( + Range{Range1{last_dim ? tensor_ref.extent(contract_dim) : size / tensor_ref.extent(contract_dim)}, + Range1{last_dim ? size / tensor_ref.extent(contract_dim) : tensor_ref.extent(contract_dim)}}); + + // contract tensor ref and the first factor matrix + gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? gradient.conj():gradient), blockfactors[contract_dim].conj(), this->zero, + temp); + + // Resize tensor_ref + gradient.resize(R); + // Remove the dimension which was just contracted out + LH_size /= tensor_ref.extent(contract_dim); + + // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract. + // If n == contract_dim then that mode is skipped. + // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I + // move the pointer that preserves the last dimension to n = ndim -2. + // In all cases I want to walk through the orders in tensor_ref backward so contract_dim = ndim - 2 + n = last_dim ? ndim - 2 : n; + contract_dim = ndim - 2; + + while (contract_dim > 0) { + // Now temp is three index object where temp has size + // (size of tensor_ref/product of dimension contracted, dimension to be + // contracted, rank) + ord_t idx2 = dimensions[contract_dim], + idx1 = LH_size / idx2; + temp.resize( + Range{Range1{idx1}, Range1{idx2}, Range1{pseudo_rank}}); + Tensor contract_tensor; + //Tensor contract_tensor(Range{Range1{idx1}, Range1{pseudo_rank}}); + //contract_tensor.fill(0.0); + const auto &a = blockfactors[(last_dim ? contract_dim + 1 : contract_dim)]; + // If the middle dimension is the mode not being contracted, I will move + // it to the right hand side temp((size of tensor_ref/product of + // dimension contracted, rank * mode n dimension) + if (n == contract_dim) { + pseudo_rank *= offset_dim; + } + + // If the code hasn't hit the mode of interest yet, it will contract + // over the middle dimension and sum over the rank. + + else if (contract_dim > n) { + middle_contract(this->one, temp, a.conj(), this->zero, contract_tensor); + temp = contract_tensor; + } + + // If the code has passed the mode of interest, it will contract over + // the middle dimension and sum over rank * mode n dimension + else { + middle_contract_with_pseudorank(this->one, temp, a.conj(), this->zero, contract_tensor); + temp = contract_tensor; + } + + LH_size /= idx2; + contract_dim--; + } + n = last_dim ? n+1 : n; + + // If the mode of interest is the 0th mode, then the while loop above + // contracts over all other dimensions and resulting temp is of the + // correct dimension If the mode of interest isn't 0th mode, must contract + // out the 0th mode here, the above algorithm can't perform this + // contraction because the mode of interest is coupled with the rank + if (n != 0) { + ind_t idx1 = dimensions[0]; + temp.resize(Range{Range1{idx1}, Range1{offset_dim}, Range1{brank}}); + Tensor contract_tensor(Range{Range1{offset_dim}, Range1{brank}}); + contract_tensor.fill(0.0); + + const auto &a = blockfactors[(last_dim ? 1 : 0)]; + front_contract(this->one, temp, a.conj(), this->zero, contract_tensor); + + temp = contract_tensor; + } + + // Add lambda to factor matrices if RALS + if(lambda !=0){ + auto LamA = blockfactors[n]; + scal(lambda, LamA); + temp += LamA; + } + + // multiply resulting matrix temp by pseudoinverse to calculate optimized + // factor matrix + pseudoinverse_block(n, brank, fast_pI, matlab, temp, lambda); + + // Normalize the columns of the new factor matrix and update + this->normCol(temp, block_start); + auto ptr = temp.data(); + blockfactors[n] = temp; + { + auto tref_dim = tensor_ref.extent(n); + auto A_ptr = A[n].data(); + auto rank = A[n].extent(1); + auto temp_pos = 0; + for (ind_t i = 0, skip = 0; i < tref_dim; ++i, skip += rank){ + for (auto b = block_start; b < block_end; ++b, ++temp_pos){ + *(A_ptr + skip + b) = *(ptr + temp_pos); + } + } + } + } + + /// Calculates the column norms of a matrix and saves the norm values into + /// lambda tensor (last matrix in the A) + + /// \param[in, out] Mat The matrix whose column will be normalized, return + /// \c Mat with all columns normalized + + void normCol(Tensor &Mat, ord_t block_start) { + if (Mat.rank() > 2) BTAS_EXCEPTION("normCol with rank > 2 not yet supported"); + ind_t rank = Mat.extent(1), + Nsize = Mat.extent(0); + ord_t size = Mat.size(); + + auto Mat_ptr = Mat.data(); + std::vector lambda; + for(auto i = 0; i < rank; ++i) lambda.emplace_back(T(0.0)); + + auto lam_ptr = lambda.data(); + for (ord_t i = 0; i < size; ++i) { + *(lam_ptr + i % rank) += *(Mat_ptr + i) * btas::impl::conj(*(Mat_ptr + i)); + } + + auto A_ptr = A[ndim].data() + block_start; + for (ind_t i = 0; i < rank; ++i) { + auto val = sqrt(*(lam_ptr + i)); + *(A_ptr + i) = val; + val = (abs(val) < 1e-12 ? 0.0 : 1.0 / val); + btas::scal(Nsize, val, (Mat_ptr + i), rank); + } + } + + }; +} + +#endif // BTAS_GENERIC_CP_BCD_H From bc235f03d70c2810ee8b340ec1cda97ee3f0ed08 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 3 May 2023 15:41:19 -0400 Subject: [PATCH 02/18] Use target instead of assuming tensor_ref For BCD --- btas/generic/cp_als.h | 36 ++++++++++++++++++------------------ btas/generic/cp_rals.h | 4 ++-- btas/generic/tuck_cp_als.h | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 132956ce..12bba3f9 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -625,7 +625,7 @@ namespace btas { if (tmp != i) { A[i] = A[tmp]; } else if (dir) { - direct(i, rank, fast_pI, matlab, converge_test); + direct(i, rank, fast_pI, matlab, converge_test, tensor_ref); } else { update_w_KRP(i, rank, fast_pI, matlab, converge_test); } @@ -737,52 +737,52 @@ namespace btas { /// return if \c matlab was successful /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged - void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0) { + void direct(size_t n, ind_t rank, bool &fast_pI, bool &matlab, ConvClass &converge_test, Tensor& target, double lambda = 0.0) { // Determine if n is the last mode, if it is first contract with first mode // and transpose the product bool last_dim = n == ndim - 1; // product of all dimensions ord_t LH_size = size; size_t contract_dim = last_dim ? 0 : ndim - 1; - ind_t offset_dim = tensor_ref.extent(n); + ind_t offset_dim = target.extent(n); ind_t pseudo_rank = rank; // Store the dimensions which are available to hadamard contract std::vector dimensions; for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) { - dimensions.push_back(tensor_ref.extent(i)); + dimensions.push_back(target.extent(i)); } - // Modifying the dimension of tensor_ref so store the range here to resize - Range R = tensor_ref.range(); + // Modifying the dimension of target so store the range here to resize + Range R = target.range(); //Tensor an(A[n].range()); - // Resize the tensor which will store the product of tensor_ref and the first factor matrix - Tensor temp = Tensor(size / tensor_ref.extent(contract_dim), rank); - tensor_ref.resize( - Range{Range1{last_dim ? tensor_ref.extent(contract_dim) : size / tensor_ref.extent(contract_dim)}, - Range1{last_dim ? size / tensor_ref.extent(contract_dim) : tensor_ref.extent(contract_dim)}}); + // Resize the tensor which will store the product of target and the first factor matrix + Tensor temp = Tensor(size / target.extent(contract_dim), rank); + target.resize( + Range{Range1{last_dim ? target.extent(contract_dim) : size / target.extent(contract_dim)}, + Range1{last_dim ? size / target.extent(contract_dim) : target.extent(contract_dim)}}); // contract tensor ref and the first factor matrix - gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? tensor_ref.conj():tensor_ref), A[contract_dim].conj(), this->zero, + gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? target.conj():target), A[contract_dim].conj(), this->zero, temp); - // Resize tensor_ref - tensor_ref.resize(R); + // Resize target + target.resize(R); // Remove the dimension which was just contracted out - LH_size /= tensor_ref.extent(contract_dim); + LH_size /= target.extent(contract_dim); // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract. // If n == contract_dim then that mode is skipped. // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I // move the pointer that preserves the last dimension to n = ndim -2. - // In all cases I want to walk through the orders in tensor_ref backward so contract_dim = ndim - 2 + // In all cases I want to walk through the orders in target backward so contract_dim = ndim - 2 n = last_dim ? ndim - 2 : n; contract_dim = ndim - 2; while (contract_dim > 0) { // Now temp is three index object where temp has size - // (size of tensor_ref/product of dimension contracted, dimension to be + // (size of target/product of dimension contracted, dimension to be // contracted, rank) ord_t idx2 = dimensions[contract_dim], idx1 = LH_size / idx2; @@ -793,7 +793,7 @@ namespace btas { //contract_tensor.fill(0.0); const auto &a = A[(last_dim ? contract_dim + 1 : contract_dim)]; // If the middle dimension is the mode not being contracted, I will move - // it to the right hand side temp((size of tensor_ref/product of + // it to the right hand side temp((size of target/product of // dimension contracted, rank * mode n dimension) if (n == contract_dim) { pseudo_rank *= offset_dim; diff --git a/btas/generic/cp_rals.h b/btas/generic/cp_rals.h index 55e612b9..a3d6ef08 100644 --- a/btas/generic/cp_rals.h +++ b/btas/generic/cp_rals.h @@ -172,9 +172,9 @@ namespace btas { A[i] = A[tmp]; lambda[i] = lambda[tmp]; } else if (dir) { - this->direct(i, rank, fast_pI, matlab, converge_test, lambda[i]); + this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]); } else { - update_w_KRP(i, rank, fast_pI, matlab, converge_test, lambda[i]); + update_w_KRP(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]); } // Compute the value s after normalizing the columns auto & ai = A[i]; diff --git a/btas/generic/tuck_cp_als.h b/btas/generic/tuck_cp_als.h index 25d0db81..469723df 100644 --- a/btas/generic/tuck_cp_als.h +++ b/btas/generic/tuck_cp_als.h @@ -484,7 +484,7 @@ namespace btas{ count++; this->num_ALS++; for (size_t i = 0; i < ndim; i++) { - this->direct(i, rank, fast_pI, matlab, converge_test, lambda[i]); + this->direct(i, rank, fast_pI, matlab, converge_test, tensor_ref, lambda[i]); // Compute the value s after normalizing the columns auto & ai = A[i]; this->s = helper(i, ai); From 4079f1a26c3642a3fabfa9b2d2cb797148f7450e Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 3 May 2023 15:48:07 -0400 Subject: [PATCH 03/18] Start fixing BCD --- btas/generic/cp_bcd.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index f8844834..49db5953 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -192,13 +192,10 @@ namespace btas{ std::cout << fit << "\t" << change << std::endl; is_converged = (change < 0.001); if(is_converged) { - auto grad = gradient - temp; - auto grad_ptr = grad.begin(); - for(auto ptr = tensor_ref.begin(); ptr != tensor_ref.end(); ++ptr, ++grad_ptr) - *(ptr) = *(grad_ptr); + gradient = tensor_ref - temp; } }while(count < max_als && !is_converged); - std::cout << tensor_ref << std::endl; + std::cout << gradient << std::endl; } // Fix tensor_ref auto ptr = tensor_ref.begin(); From f4a5ab8ca500ee5a0d5411024d77e028a58db87b Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 4 May 2023 12:28:17 -0400 Subject: [PATCH 04/18] Fix cp_bcd Working for block size of 1 and rank. Working on intermediate block size, specifically if there is a remainder not working --- btas/generic/cp_bcd.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index 49db5953..e7c9dc54 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -135,6 +135,7 @@ namespace btas{ // copying all of A to block factors. Then we are going to take slices of A // that way we can leverage all of `CP_ALS` code without modification blockfactors = A; + gradient = tensor_ref; // this stores the factors from rank 0 to #blocks * blocksize std::vector current_grads(ndim); // Compute all the BCD of the full blocks @@ -150,7 +151,7 @@ namespace btas{ // Do the ALS loop //CP_ALS::ALS(blocksize, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); - // First do it manually so we know its right + // First do it manually, so we know its right // Compute ALS of the bth block against the gradient // computed by subtracting the previous blocks from the reference size_t count = 0; @@ -158,6 +159,7 @@ namespace btas{ bool matlab = true; auto one_over_tref = 1.0 / norm(tensor_ref); auto fit = 1.0, change = 0.0; + detail::set_norm(converge_test, norm(gradient)); do { ++count; this->num_ALS++; @@ -171,6 +173,8 @@ namespace btas{ // (this will replace A at the end) copy_blocks(blockfactors[i], A[i], block_step, block_step + blocksize); } + + auto grad = reconstruct(A, order, A[ndim]); // Copy the lambda values into the correct location in blockfactors size_t c = 0; for (auto b = block_step; b < block_step + blocksize; ++b, ++c) { @@ -180,22 +184,22 @@ namespace btas{ // To compute the accuracy fully compute CP approximation using blocks 0 through b. for (size_t i = 0; i < ndim; ++i) { auto & a_mat = blockfactors[i]; - auto lower = {z, block_step}, upper = {long(a_mat.extent(0)), block_step + blocksize}; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); } auto temp = reconstruct(current_grads, order, blockfactors[ndim]); auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; - std::cout << newfit << std::endl; change = abs(fit - newfit); fit = newfit; std::cout << fit << "\t" << change << std::endl; is_converged = (change < 0.001); + + is_converged = converge_test(A, this->AtA); if(is_converged) { gradient = tensor_ref - temp; } }while(count < max_als && !is_converged); - std::cout << gradient << std::endl; } // Fix tensor_ref auto ptr = tensor_ref.begin(); From 1f2f05af82946600ec0f09046575b99480b93dc4 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 4 May 2023 12:28:30 -0400 Subject: [PATCH 05/18] Add function to change norm of converge_test --- btas/generic/cp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 8b557289..384b6e08 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -71,6 +71,16 @@ namespace btas { epsilon = t.get_fit(max_iter); return; } + + template + void set_norm(T &t, double norm){ + return; + } + + template + void set_norm(FitCheck &t, double norm){ + t.set_norm(norm); + } } // namespace detail /** \brief Base class to compute the Canonical Product (CP) decomposition of an order-N From 62da0718aeb9d4206f9dd24b78242b53c716c2d6 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 8 May 2023 13:02:51 -0400 Subject: [PATCH 06/18] Update cp_bcd.h --- btas/generic/cp_bcd.h | 279 ++++++++++-------------------------------- 1 file changed, 65 insertions(+), 214 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index e7c9dc54..d502a5ca 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -98,6 +98,8 @@ namespace btas{ Tensor gradient, block_ref; // Stores gradient of BCD for fitting. ind_t blocksize; // Size of block gradient std::vector blockfactors; + std::vector order; // convenient to write order of tensors for reconstruct + long z = 0; /// computed the CP decomposition using ALS to minimize the loss function for fixed rank \p rank /// \param[in] rank The rank of the CP decomposition. @@ -118,7 +120,6 @@ namespace btas{ /// on return \c fast_pI will be true if use of Cholesky was successful virtual void ALS(ind_t rank, ConvClass &converge_test, bool dir, int max_als, bool calculate_epsilon, double &epsilon, bool &fast_pI){ - std::vector order; for(auto i = 0; i < ndim; ++i){ order.emplace_back(i); } @@ -130,81 +131,18 @@ namespace btas{ int n_full_blocks = int( rank / blocksize), last_blocksize = rank % blocksize; - long block_step = 0, z = 0; + long block_step = 0; this->AtA = std::vector(ndim); // copying all of A to block factors. Then we are going to take slices of A // that way we can leverage all of `CP_ALS` code without modification blockfactors = A; gradient = tensor_ref; // this stores the factors from rank 0 to #blocks * blocksize - std::vector current_grads(ndim); // Compute all the BCD of the full blocks + auto matlab = true; for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { - // Take the b-th block of the factor matrix also take the - // find the partial grammian for the blocked factors - for (size_t i = 0; i < ndim; ++i){ - auto & a_mat = A[i]; - auto lower = {z, block_step}, upper = {long(a_mat.extent(0)), block_step + blocksize}; - a_mat = make_view(blockfactors[i].range().slice(lower, upper), blockfactors[i].storage()); - contract(this->one, a_mat, {1, 2}, a_mat.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); - } - - // Do the ALS loop - //CP_ALS::ALS(blocksize, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); - // First do it manually, so we know its right - // Compute ALS of the bth block against the gradient - // computed by subtracting the previous blocks from the reference - size_t count = 0; - bool is_converged = false; - bool matlab = true; - auto one_over_tref = 1.0 / norm(tensor_ref); - auto fit = 1.0, change = 0.0; - detail::set_norm(converge_test, norm(gradient)); - do { - ++count; - this->num_ALS++; - - for (size_t i = 0; i < ndim; i++) { - this->direct(i, blocksize, fast_pI, matlab, converge_test, gradient); - // BCD(i, block_step, block_step + blocksize, fast_pI, matlab, converge_test); - auto &ai = A[i]; - contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); - // Copy the block computed in A to the correct portion in blockfactors - // (this will replace A at the end) - copy_blocks(blockfactors[i], A[i], block_step, block_step + blocksize); - } - - auto grad = reconstruct(A, order, A[ndim]); - // Copy the lambda values into the correct location in blockfactors - size_t c = 0; - for (auto b = block_step; b < block_step + blocksize; ++b, ++c) { - blockfactors[ndim](b) = A[ndim](c); - } - // Test the system to see if converged. Doing the hard way first - // To compute the accuracy fully compute CP approximation using blocks 0 through b. - for (size_t i = 0; i < ndim; ++i) { - auto & a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); - } - - auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << fit << "\t" << change << std::endl; - is_converged = (change < 0.001); - - is_converged = converge_test(A, this->AtA); - if(is_converged) { - gradient = tensor_ref - temp; - } - }while(count < max_als && !is_converged); + BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test); } - // Fix tensor_ref - auto ptr = tensor_ref.begin(); - for(auto g_ptr = gradient.begin(); g_ptr != gradient.end(); ++g_ptr, ++ptr) - *(ptr) = *(g_ptr); } void copy_blocks(Tensor & to, Tensor & from, ind_t block_start, ind_t block_end){ @@ -232,161 +170,74 @@ namespace btas{ /// return if \c matlab was successful /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged - void BCD(size_t n, ind_t block_start, ind_t block_end, bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0) { - // Determine if n is the last mode, if it is first contract with first mode - // and transpose the product - bool last_dim = n == ndim - 1; - // product of all dimensions - ord_t LH_size = size; - size_t contract_dim = last_dim ? 0 : ndim - 1; - ind_t offset_dim = tensor_ref.extent(n); - ind_t brank = block_end - block_start; - ind_t pseudo_rank = brank; - - // Store the dimensions which are available to hadamard contract - std::vector dimensions; - for (size_t i = last_dim ? 1 : 0; i < (last_dim ? ndim : ndim - 1); i++) { - dimensions.push_back(tensor_ref.extent(i)); + void BCD(ind_t block_start, ind_t block_end, size_t max_als, + bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0){ + // Take the b-th block of the factor matrix also take the + // find the partial grammian for the blocked factors + auto cur_block_size = block_end - block_start; + for (size_t i = 0; i < ndim; ++i){ + auto & a_mat = A[i]; + auto lower = {z, block_start}, upper = {long(a_mat.extent(0)), block_end}; + a_mat = make_view(blockfactors[i].range().slice(lower, upper), blockfactors[i].storage()); + contract(this->one, a_mat, {1, 2}, a_mat.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); } - - // Modifying the dimension of tensor_ref so store the range here to resize - Range R = tensor_ref.range(); - - // Resize the tensor which will store the product of tensor_ref and the first factor matrix - Tensor temp = Tensor(size / tensor_ref.extent(contract_dim), brank); - gradient.resize( - Range{Range1{last_dim ? tensor_ref.extent(contract_dim) : size / tensor_ref.extent(contract_dim)}, - Range1{last_dim ? size / tensor_ref.extent(contract_dim) : tensor_ref.extent(contract_dim)}}); - - // contract tensor ref and the first factor matrix - gemm((last_dim ? blas::Op::Trans : blas::Op::NoTrans), blas::Op::NoTrans, this->one , (last_dim? gradient.conj():gradient), blockfactors[contract_dim].conj(), this->zero, - temp); - - // Resize tensor_ref - gradient.resize(R); - // Remove the dimension which was just contracted out - LH_size /= tensor_ref.extent(contract_dim); - - // n tells which dimension not to contract, and contract_dim says which dimension I am trying to contract. - // If n == contract_dim then that mode is skipped. - // if n == ndim - 1, my contract_dim = 0. The gemm transposes to make rank = ndim - 1, so I - // move the pointer that preserves the last dimension to n = ndim -2. - // In all cases I want to walk through the orders in tensor_ref backward so contract_dim = ndim - 2 - n = last_dim ? ndim - 2 : n; - contract_dim = ndim - 2; - - while (contract_dim > 0) { - // Now temp is three index object where temp has size - // (size of tensor_ref/product of dimension contracted, dimension to be - // contracted, rank) - ord_t idx2 = dimensions[contract_dim], - idx1 = LH_size / idx2; - temp.resize( - Range{Range1{idx1}, Range1{idx2}, Range1{pseudo_rank}}); - Tensor contract_tensor; - //Tensor contract_tensor(Range{Range1{idx1}, Range1{pseudo_rank}}); - //contract_tensor.fill(0.0); - const auto &a = blockfactors[(last_dim ? contract_dim + 1 : contract_dim)]; - // If the middle dimension is the mode not being contracted, I will move - // it to the right hand side temp((size of tensor_ref/product of - // dimension contracted, rank * mode n dimension) - if (n == contract_dim) { - pseudo_rank *= offset_dim; + A[ndim] = Tensor(cur_block_size); + A[ndim].fill(0.0); + // Do the ALS loop + //CP_ALS::ALS(cur_block_size, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); + // First do it manually, so we know its right + // Compute ALS of the bth block against the gradient + // computed by subtracting the previous blocks from the reference + size_t count = 0; + bool is_converged = false; + auto one_over_tref = 1.0 / norm(tensor_ref); + auto fit = 1.0, change = 0.0; + detail::set_norm(converge_test, norm(gradient)); + do { + ++count; + this->num_ALS++; + + for (size_t i = 0; i < ndim; i++) { + this->direct(i, cur_block_size, fast_pI, matlab, converge_test, gradient); + auto &ai = A[i]; + contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); + // Copy the block computed in A to the correct portion in blockfactors + // (this will replace A at the end) + copy_blocks(blockfactors[i], A[i], block_start, block_end); } - // If the code hasn't hit the mode of interest yet, it will contract - // over the middle dimension and sum over the rank. - - else if (contract_dim > n) { - middle_contract(this->one, temp, a.conj(), this->zero, contract_tensor); - temp = contract_tensor; + // Copy the lambda values into the correct location in blockfactors + size_t c = 0; + for (auto b = block_start; b < block_end; ++b, ++c) { + blockfactors[ndim](b) = A[ndim](c); } + // Test the system to see if converged. Doing the hard way first + // To compute the accuracy fully compute CP approximation using blocks 0 through b. + bool compute_full_fit = true; + Tensor temp; + if(compute_full_fit) { + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_end}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } - // If the code has passed the mode of interest, it will contract over - // the middle dimension and sum over rank * mode n dimension - else { - middle_contract_with_pseudorank(this->one, temp, a.conj(), this->zero, contract_tensor); - temp = contract_tensor; + temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_end << "\t"; + std::cout << fit << "\t" << change << std::endl; } - LH_size /= idx2; - contract_dim--; - } - n = last_dim ? n+1 : n; - - // If the mode of interest is the 0th mode, then the while loop above - // contracts over all other dimensions and resulting temp is of the - // correct dimension If the mode of interest isn't 0th mode, must contract - // out the 0th mode here, the above algorithm can't perform this - // contraction because the mode of interest is coupled with the rank - if (n != 0) { - ind_t idx1 = dimensions[0]; - temp.resize(Range{Range1{idx1}, Range1{offset_dim}, Range1{brank}}); - Tensor contract_tensor(Range{Range1{offset_dim}, Range1{brank}}); - contract_tensor.fill(0.0); - - const auto &a = blockfactors[(last_dim ? 1 : 0)]; - front_contract(this->one, temp, a.conj(), this->zero, contract_tensor); - - temp = contract_tensor; - } - - // Add lambda to factor matrices if RALS - if(lambda !=0){ - auto LamA = blockfactors[n]; - scal(lambda, LamA); - temp += LamA; - } - - // multiply resulting matrix temp by pseudoinverse to calculate optimized - // factor matrix - pseudoinverse_block(n, brank, fast_pI, matlab, temp, lambda); - - // Normalize the columns of the new factor matrix and update - this->normCol(temp, block_start); - auto ptr = temp.data(); - blockfactors[n] = temp; - { - auto tref_dim = tensor_ref.extent(n); - auto A_ptr = A[n].data(); - auto rank = A[n].extent(1); - auto temp_pos = 0; - for (ind_t i = 0, skip = 0; i < tref_dim; ++i, skip += rank){ - for (auto b = block_start; b < block_end; ++b, ++temp_pos){ - *(A_ptr + skip + b) = *(ptr + temp_pos); - } + is_converged = converge_test(A, this->AtA); + if(1.0 - fit < 1e-8) is_converged = true; + if(is_converged) { + gradient -= reconstruct(A, order, A[ndim]); + std::cout << norm(gradient - (tensor_ref - temp)) << std::endl; } - } - } - - /// Calculates the column norms of a matrix and saves the norm values into - /// lambda tensor (last matrix in the A) - - /// \param[in, out] Mat The matrix whose column will be normalized, return - /// \c Mat with all columns normalized - - void normCol(Tensor &Mat, ord_t block_start) { - if (Mat.rank() > 2) BTAS_EXCEPTION("normCol with rank > 2 not yet supported"); - ind_t rank = Mat.extent(1), - Nsize = Mat.extent(0); - ord_t size = Mat.size(); - - auto Mat_ptr = Mat.data(); - std::vector lambda; - for(auto i = 0; i < rank; ++i) lambda.emplace_back(T(0.0)); - - auto lam_ptr = lambda.data(); - for (ord_t i = 0; i < size; ++i) { - *(lam_ptr + i % rank) += *(Mat_ptr + i) * btas::impl::conj(*(Mat_ptr + i)); - } - - auto A_ptr = A[ndim].data() + block_start; - for (ind_t i = 0; i < rank; ++i) { - auto val = sqrt(*(lam_ptr + i)); - *(A_ptr + i) = val; - val = (abs(val) < 1e-12 ? 0.0 : 1.0 / val); - btas::scal(Nsize, val, (Mat_ptr + i), rank); - } + }while(count < max_als && !is_converged); } }; From df4938ba8653961aa3cae65a8a83aa03303eda22 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Mon, 8 May 2023 14:41:34 -0400 Subject: [PATCH 07/18] BCD now completely working! --- btas/generic/cp_bcd.h | 64 +++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index d502a5ca..f6141439 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -140,9 +140,50 @@ namespace btas{ // this stores the factors from rank 0 to #blocks * blocksize // Compute all the BCD of the full blocks auto matlab = true; + auto one_over_tref = 1.0 / norm(tensor_ref); + auto fit = 1.0, change = 0.0; + bool compute_full_fit = true; for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test); + // Test the system to see if converged. Doing the hard way first + // To compute the accuracy fully compute CP approximation using blocks 0 through b. + if(compute_full_fit) { + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_step + blocksize << "\t"; + std::cout << fit << "\t" << change << std::endl; + } } + if(last_blocksize != 0) { + this->AtA = std::vector(ndim); + block_step = n_full_blocks * blocksize; + BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test); + if(compute_full_fit) { + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + last_blocksize}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_step + blocksize << "\t"; + std::cout << fit << "\t" << change << std::endl; + } + } + A = blockfactors; } void copy_blocks(Tensor & to, Tensor & from, ind_t block_start, ind_t block_end){ @@ -190,8 +231,6 @@ namespace btas{ // computed by subtracting the previous blocks from the reference size_t count = 0; bool is_converged = false; - auto one_over_tref = 1.0 / norm(tensor_ref); - auto fit = 1.0, change = 0.0; detail::set_norm(converge_test, norm(gradient)); do { ++count; @@ -211,31 +250,10 @@ namespace btas{ for (auto b = block_start; b < block_end; ++b, ++c) { blockfactors[ndim](b) = A[ndim](c); } - // Test the system to see if converged. Doing the hard way first - // To compute the accuracy fully compute CP approximation using blocks 0 through b. - bool compute_full_fit = true; - Tensor temp; - if(compute_full_fit) { - std::vector current_grads(ndim); - for (size_t i = 0; i < ndim; ++i) { - auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_end}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); - } - - temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << block_end << "\t"; - std::cout << fit << "\t" << change << std::endl; - } is_converged = converge_test(A, this->AtA); - if(1.0 - fit < 1e-8) is_converged = true; if(is_converged) { gradient -= reconstruct(A, order, A[ndim]); - std::cout << norm(gradient - (tensor_ref - temp)) << std::endl; } }while(count < max_als && !is_converged); } From 5bb3bc861470f7fa6c0b5701931e433e5149d60b Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 18 May 2023 15:06:10 -0400 Subject: [PATCH 08/18] BCD wasn't calculating gradient correctly --- btas/generic/cp_bcd.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index f6141439..bff88034 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -140,9 +140,9 @@ namespace btas{ // this stores the factors from rank 0 to #blocks * blocksize // Compute all the BCD of the full blocks auto matlab = true; - auto one_over_tref = 1.0 / norm(tensor_ref); + auto one_over_tref = 1.0 / this->norm(tensor_ref); auto fit = 1.0, change = 0.0; - bool compute_full_fit = true; + bool compute_full_fit = false; for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test); // Test the system to see if converged. Doing the hard way first @@ -156,7 +156,7 @@ namespace btas{ } auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; change = abs(fit - newfit); fit = newfit; std::cout << block_step + blocksize << "\t"; @@ -176,13 +176,16 @@ namespace btas{ } auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - norm(temp - tensor_ref) * one_over_tref; + auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; change = abs(fit - newfit); fit = newfit; - std::cout << block_step + blocksize << "\t"; + std::cout << block_step + last_blocksize << "\t"; std::cout << fit << "\t" << change << std::endl; } } + epsilon = (compute_full_fit == false ? + this->norm(tensor_ref - reconstruct(blockfactors, order, blockfactors[ndim])) + : 1.0 - fit); A = blockfactors; } @@ -231,7 +234,7 @@ namespace btas{ // computed by subtracting the previous blocks from the reference size_t count = 0; bool is_converged = false; - detail::set_norm(converge_test, norm(gradient)); + detail::set_norm(converge_test, this->norm(gradient)); do { ++count; this->num_ALS++; @@ -252,10 +255,8 @@ namespace btas{ } is_converged = converge_test(A, this->AtA); - if(is_converged) { - gradient -= reconstruct(A, order, A[ndim]); - } }while(count < max_als && !is_converged); + gradient -= reconstruct(A, order, A[ndim]); } }; From d66b529ad6d538302a2a875409a061b5af9af952 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 18 May 2023 15:06:38 -0400 Subject: [PATCH 09/18] Add BCD unit tests --- unittest/tensor_cp_test.cc | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/unittest/tensor_cp_test.cc b/unittest/tensor_cp_test.cc index beb097f4..815c521e 100644 --- a/unittest/tensor_cp_test.cc +++ b/unittest/tensor_cp_test.cc @@ -20,6 +20,7 @@ TEST_CASE("CP") using conv_class = btas::FitCheck; using conv_class_coupled = btas::CoupledFitCheck; using btas::CP_ALS; + using btas::CP_BCD; using btas::CP_RALS; using btas::CP_DF_ALS; using btas::COUPLED_CP_ALS; @@ -374,7 +375,28 @@ TEST_CASE("CP") double diff = 1.0 - A1.compute_error(conv_coupled, 1e-2, 1, 19); CHECK(std::abs(diff - results(38,0)) <= epsilon); } - */ + */ + } + // Block Coodirnate descent test + { + SECTION("BCD MODE = 3, Finite rank"){ + CP_BCD A1(D3, 9); + conv.set_norm(norm3); + double diff = A1.compute_rank_random(13, conv, 100); + CHECK(std::abs(diff) <= epsilon); + } + SECTION("BCD MODE = 4, Finite rank"){ + CP_BCD A1(D4, 37); + conv.set_norm(norm4); + double diff = A1.compute_rank_random(55, conv, 100); + CHECK(std::abs(diff) <= epsilon); + } + SECTION("BCD MODE = 5, Finite rank"){ + CP_BCD A1(D5, 26); + conv.set_norm(norm5); + double diff = A1.compute_rank_random(60, conv, 100); + CHECK(std::abs(diff) <= epsilon); + } } } #endif From d56085e497838c9b2503980852fb07acc641c6e9 Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 25 May 2023 14:17:18 -0400 Subject: [PATCH 10/18] Allow more than 1 sweep of block coordinates, this allows each block to be updated with knowledge of the other blocks --- btas/generic/cp_bcd.h | 123 +++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index bff88034..5819916a 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -74,7 +74,7 @@ namespace btas{ /// that stores the reference tensor. /// Reference tensor has no symmetries. /// \param[in] tensor the reference tensor to be decomposed. - CP_BCD(Tensor &tensor, ind_t block_size = 1) : CP_ALS(tensor), blocksize(block_size){ + CP_BCD(Tensor &tensor, ind_t block_size = 1, size_t sweeps=1) : CP_ALS(tensor), blocksize(block_size), nsweeps(sweeps){ } /// Create a CP BCD object, child class of the CP object @@ -86,8 +86,8 @@ namespace btas{ /// symmetries of {0,1,1,3} /// \param[in] tensor the reference tensor to be decomposed. /// \param[in] symms the symmetries of the reference tensor. - CP_BCD(Tensor &tensor, std::vector &symms, ind_t block_size = 1) - : CP_ALS(tensor, symms), blocksize(block_size) { + CP_BCD(Tensor &tensor, std::vector &symms, ind_t block_size = 1, size_t sweeps=1) + : CP_ALS(tensor, symms), blocksize(block_size), nsweeps(sweeps){ } CP_BCD() = default; @@ -100,6 +100,7 @@ namespace btas{ std::vector blockfactors; std::vector order; // convenient to write order of tensors for reconstruct long z = 0; + size_t nsweeps; /// computed the CP decomposition using ALS to minimize the loss function for fixed rank \p rank /// \param[in] rank The rank of the CP decomposition. @@ -131,8 +132,6 @@ namespace btas{ int n_full_blocks = int( rank / blocksize), last_blocksize = rank % blocksize; - long block_step = 0; - this->AtA = std::vector(ndim); // copying all of A to block factors. Then we are going to take slices of A // that way we can leverage all of `CP_ALS` code without modification blockfactors = A; @@ -143,53 +142,57 @@ namespace btas{ auto one_over_tref = 1.0 / this->norm(tensor_ref); auto fit = 1.0, change = 0.0; bool compute_full_fit = false; - for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { - BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test); - // Test the system to see if converged. Doing the hard way first - // To compute the accuracy fully compute CP approximation using blocks 0 through b. - if(compute_full_fit) { - std::vector current_grads(ndim); - for (size_t i = 0; i < ndim; ++i) { - auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + for(auto s = 0; s < nsweeps; ++s){ + long block_step = 0; + this->AtA = std::vector(ndim); + for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { + BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test, s); + // Test the system to see if converged. Doing the hard way first + // To compute the accuracy fully compute CP approximation using blocks 0 through b. + if(compute_full_fit) { + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_step + blocksize << "\t"; + std::cout << fit << "\t" << change << std::endl; } - - auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << block_step + blocksize << "\t"; - std::cout << fit << "\t" << change << std::endl; } - } - if(last_blocksize != 0) { - this->AtA = std::vector(ndim); - block_step = n_full_blocks * blocksize; - BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test); - if(compute_full_fit) { - std::vector current_grads(ndim); - for (size_t i = 0; i < ndim; ++i) { - auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + last_blocksize}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + if(last_blocksize != 0) { + this->AtA = std::vector(ndim); + block_step = n_full_blocks * blocksize; + BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test, s); + if(compute_full_fit) { + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + last_blocksize}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_step + last_blocksize << "\t"; + std::cout << fit << "\t" << change << std::endl; } - - auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << block_step + last_blocksize << "\t"; - std::cout << fit << "\t" << change << std::endl; } + epsilon = (compute_full_fit == false ? + this->norm(tensor_ref - reconstruct(blockfactors, order, blockfactors[ndim])) + : 1.0 - fit); } - epsilon = (compute_full_fit == false ? - this->norm(tensor_ref - reconstruct(blockfactors, order, blockfactors[ndim])) - : 1.0 - fit); A = blockfactors; } - void copy_blocks(Tensor & to, Tensor & from, ind_t block_start, ind_t block_end){ + void copy_blocks(Tensor & to, const Tensor & from, ind_t block_start, ind_t block_end){ auto tref_dim = to.extent(0); auto to_rank = to.extent(1), from_rank = from.extent(1); @@ -215,7 +218,7 @@ namespace btas{ /// \param[in, out] converge_test Test to see if ALS is converged, holds the value of fit. test to see if the ALS is converged void BCD(ind_t block_start, ind_t block_end, size_t max_als, - bool &fast_pI, bool &matlab, ConvClass &converge_test, double lambda = 0.0){ + bool &fast_pI, bool &matlab, ConvClass &converge_test, size_t sweep, double lambda = 0.0){ // Take the b-th block of the factor matrix also take the // find the partial grammian for the blocked factors auto cur_block_size = block_end - block_start; @@ -227,6 +230,14 @@ namespace btas{ } A[ndim] = Tensor(cur_block_size); A[ndim].fill(0.0); + if(sweep != 0){ + size_t c = 0; + auto lam_full_ptr = blockfactors[ndim].data(), new_lam_ptr = A[ndim].data(); + for(auto b = block_start; b < block_end; ++b, ++c) + *(new_lam_ptr + c) = *(lam_full_ptr + b); + + gradient += reconstruct(A, order, A[ndim]); + } // Do the ALS loop //CP_ALS::ALS(cur_block_size, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); // First do it manually, so we know its right @@ -243,20 +254,24 @@ namespace btas{ this->direct(i, cur_block_size, fast_pI, matlab, converge_test, gradient); auto &ai = A[i]; contract(this->one, ai, {1, 2}, ai.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); - // Copy the block computed in A to the correct portion in blockfactors - // (this will replace A at the end) - copy_blocks(blockfactors[i], A[i], block_start, block_end); - } - - // Copy the lambda values into the correct location in blockfactors - size_t c = 0; - for (auto b = block_start; b < block_end; ++b, ++c) { - blockfactors[ndim](b) = A[ndim](c); } is_converged = converge_test(A, this->AtA); }while(count < max_als && !is_converged); + // Compute new difference gradient -= reconstruct(A, order, A[ndim]); + + // Copy the block computed in A to the correct portion in blockfactors + // (this will replace A at the end) + for(size_t i = 0; i < ndim; ++i){ + copy_blocks(blockfactors[i], A[i], block_start, block_end); + } + // Copy the lambda values into the correct location in blockfactors + size_t c = 0; + auto lam_full_ptr = blockfactors[ndim].data(), new_lam_ptr = A[ndim].data(); + for (auto b = block_start; b < block_end; ++b, ++c) { + *(lam_full_ptr + b) = *(new_lam_ptr + c); + } } }; From c79a7cc503caf39ef8b3e65a894c636cd6bc29ab Mon Sep 17 00:00:00 2001 From: kmp5VT Date: Thu, 25 May 2023 14:17:38 -0400 Subject: [PATCH 11/18] Fix for complex tensors --- btas/generic/cp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/btas/generic/cp.h b/btas/generic/cp.h index 384b6e08..e59be5f8 100644 --- a/btas/generic/cp.h +++ b/btas/generic/cp.h @@ -571,7 +571,7 @@ namespace btas { /// \param[in] Mat Calculates the 2-norm of the matrix mat /// \return the 2-norm. - auto norm(const Tensor &Mat) { return sqrt(abs(dot(Mat, Mat))); } + auto norm(const Tensor &Mat) { return sqrt(abs(dot(Mat, btas::impl::conj(Mat)))); } /// SVD referencing code from /// http://www.netlib.org/lapack/explore-html/de/ddd/lapacke_8h_af31b3cb47f7cc3b9f6541303a2968c9f.html From 132eecbf3e3051f0592b0b4a0ef5b17e7d879f41 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Fri, 26 May 2023 09:56:24 -0400 Subject: [PATCH 12/18] one is const, one is not --- btas/generic/cp_bcd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index 5819916a..6aaf634b 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -196,7 +196,8 @@ namespace btas{ auto tref_dim = to.extent(0); auto to_rank = to.extent(1), from_rank = from.extent(1); - auto to_ptr = to.data(), from_ptr = from.data(); + auto to_ptr = to.data(); + auto from_ptr = from.data(); ind_t from_pos = 0; for (ind_t i = 0, skip = 0; i < tref_dim; ++i, skip += to_rank){ for (auto b = block_start; b < block_end; ++b, ++from_pos){ From 2b3a4a0077753e7b76b565cca6a16737b6484849 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 18 Jul 2023 11:53:23 -0400 Subject: [PATCH 13/18] create `compute_full` fit function --- btas/generic/cp_bcd.h | 47 ++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index 6aaf634b..3cd16841 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -150,19 +150,7 @@ namespace btas{ // Test the system to see if converged. Doing the hard way first // To compute the accuracy fully compute CP approximation using blocks 0 through b. if(compute_full_fit) { - std::vector current_grads(ndim); - for (size_t i = 0; i < ndim; ++i) { - auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + blocksize}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); - } - - auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << block_step + blocksize << "\t"; - std::cout << fit << "\t" << change << std::endl; + compute_full(long(blockfactors.extent(0)), block_step + blocksize, one_over_tref, change, fit); } } if(last_blocksize != 0) { @@ -170,24 +158,13 @@ namespace btas{ block_step = n_full_blocks * blocksize; BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test, s); if(compute_full_fit) { - std::vector current_grads(ndim); - for (size_t i = 0; i < ndim; ++i) { - auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_step + last_blocksize}; - current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); - } - - auto temp = reconstruct(current_grads, order, blockfactors[ndim]); - auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; - change = abs(fit - newfit); - fit = newfit; - std::cout << block_step + last_blocksize << "\t"; - std::cout << fit << "\t" << change << std::endl; + compute_full(long(blockfactors.extent(0)), block_step + last_blocksize, one_over_tref, change, fit); } } epsilon = (compute_full_fit == false ? - this->norm(tensor_ref - reconstruct(blockfactors, order, blockfactors[ndim])) + this->norm(gradient) * one_over_tref : 1.0 - fit); + std::cout << epsilon << std::endl; } A = blockfactors; } @@ -205,6 +182,22 @@ namespace btas{ } } } + + void compute_full(long extent, long block_end, T one_over_tref, T & change, T& fit){ + std::vector current_grads(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = blockfactors[i]; + auto lower = {z, z}, upper = {extent, block_end}; + current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); + } + + auto temp = reconstruct(current_grads, order, blockfactors[ndim]); + auto newfit = 1.0 - this->norm(temp - tensor_ref) * one_over_tref; + change = abs(fit - newfit); + fit = newfit; + std::cout << block_end << "\t"; + std::cout << fit << "\t" << change << std::endl; + } /// Computes an optimized factor matrix holding all others constant. /// No Khatri-Rao product computed, immediate contraction From dbd76aa9a46ac734564ab1a951cf632d510320eb Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Thu, 17 Aug 2023 12:31:42 -0400 Subject: [PATCH 14/18] Add ability to use different block sizes --- btas/generic/cp_bcd.h | 77 ++++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index 3cd16841..b2826c4d 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -74,7 +74,10 @@ namespace btas{ /// that stores the reference tensor. /// Reference tensor has no symmetries. /// \param[in] tensor the reference tensor to be decomposed. - CP_BCD(Tensor &tensor, ind_t block_size = 1, size_t sweeps=1) : CP_ALS(tensor), blocksize(block_size), nsweeps(sweeps){ + CP_BCD(Tensor &tensor, ind_t block_size = 1, size_t sweeps=1, std::vector bs = std::vector()) : CP_ALS(tensor), blocksize(block_size), nsweeps(sweeps){ + if(!bs.empty()){ + block_sizes = bs; + } } /// Create a CP BCD object, child class of the CP object @@ -94,6 +97,11 @@ namespace btas{ ~CP_BCD() = default; + void set_block_sizes(size_t rank, std::vector bs){ + BTAS_ASSERT(bs[bs.size() - 1] == rank); + block_sizes = bs; + return; + } protected: Tensor gradient, block_ref; // Stores gradient of BCD for fitting. ind_t blocksize; // Size of block gradient @@ -101,6 +109,7 @@ namespace btas{ std::vector order; // convenient to write order of tensors for reconstruct long z = 0; size_t nsweeps; + std::vector block_sizes; /// computed the CP decomposition using ALS to minimize the loss function for fixed rank \p rank /// \param[in] rank The rank of the CP decomposition. @@ -128,10 +137,19 @@ namespace btas{ block_ref = tensor_ref; // Number of full blocks with rank blocksize - // plus one more block with rank rank % blocksize + // plus one more block with rank: rank % blocksize int n_full_blocks = int( rank / blocksize), last_blocksize = rank % blocksize; + if(block_sizes.empty()) { + size_t index = 0; + block_sizes.emplace_back(index); + for(size_t i = 0; i < n_full_blocks; ++i) { + index += blocksize; + block_sizes.emplace_back(index); + } + if(last_blocksize != 0) block_sizes.emplace_back(index + last_blocksize); + } // copying all of A to block factors. Then we are going to take slices of A // that way we can leverage all of `CP_ALS` code without modification blockfactors = A; @@ -142,31 +160,49 @@ namespace btas{ auto one_over_tref = 1.0 / this->norm(tensor_ref); auto fit = 1.0, change = 0.0; bool compute_full_fit = false; + size_t n_blocks = block_sizes.size(); for(auto s = 0; s < nsweeps; ++s){ long block_step = 0; this->AtA = std::vector(ndim); - for (long b = 0; b < n_full_blocks; ++b, block_step += blocksize) { - BCD(block_step, block_step + blocksize, max_als, fast_pI, matlab, converge_test, s); + + // E - A1 - A2 - ... = gradient + // initial : gradient = E + // gradient = E - A1 + for (long b = 1; b < n_blocks; ++b) { + std::cout << "\tnorm (remainder) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; + this->AtA = std::vector(ndim); + BCD(block_sizes[b - 1], block_sizes[b], max_als, fast_pI, matlab, converge_test, s); // Test the system to see if converged. Doing the hard way first // To compute the accuracy fully compute CP approximation using blocks 0 through b. if(compute_full_fit) { - compute_full(long(blockfactors.extent(0)), block_step + blocksize, one_over_tref, change, fit); - } - } - if(last_blocksize != 0) { - this->AtA = std::vector(ndim); - block_step = n_full_blocks * blocksize; - BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test, s); - if(compute_full_fit) { - compute_full(long(blockfactors.extent(0)), block_step + last_blocksize, one_over_tref, change, fit); + compute_full(block_step + blocksize, one_over_tref, change, fit); } } +// if(last_blocksize != 0) { +// std::cout << "\t\tnorm grad / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; +// this->AtA = std::vector(ndim); +// block_step = n_full_blocks * blocksize; +// BCD(block_step, block_step + last_blocksize, max_als, fast_pI, matlab, converge_test, s); +// if(compute_full_fit) { +// compute_full(block_step + last_blocksize, one_over_tref, change, fit); +// } +// } + std::cout << "\t\tnorm grad / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; epsilon = (compute_full_fit == false ? - this->norm(gradient) * one_over_tref - : 1.0 - fit); - std::cout << epsilon << std::endl; + 1.0 - this->norm(gradient) * one_over_tref + : fit); + std::cout << epsilon << "\n" << std::endl; } + detail::set_norm(converge_test, this->norm(tensor_ref)); + converge_test.verbose(true); A = blockfactors; + this->AtA = std::vector(ndim); + for (size_t i = 0; i < ndim; ++i) { + auto &a_mat = A[i]; + contract(this->one, a_mat, {1, 2}, a_mat.conj(), {1, 3}, this->zero, this->AtA[i], {2, 3}); + } + gradient = reconstruct(A, order, A[ndim]); + std::cout << norm(tensor_ref - gradient) * one_over_tref << std::endl; } void copy_blocks(Tensor & to, const Tensor & from, ind_t block_start, ind_t block_end){ @@ -183,11 +219,11 @@ namespace btas{ } } - void compute_full(long extent, long block_end, T one_over_tref, T & change, T& fit){ + void compute_full(long block_end, T one_over_tref, T & change, T& fit){ std::vector current_grads(ndim); for (size_t i = 0; i < ndim; ++i) { auto &a_mat = blockfactors[i]; - auto lower = {z, z}, upper = {extent, block_end}; + auto lower = {z, z}, upper = {long(a_mat.extent(0)), block_end}; current_grads[i] = make_view(a_mat.range().slice(lower, upper), a_mat.storage()); } @@ -229,9 +265,11 @@ namespace btas{ auto lam_full_ptr = blockfactors[ndim].data(), new_lam_ptr = A[ndim].data(); for(auto b = block_start; b < block_end; ++b, ++c) *(new_lam_ptr + c) = *(lam_full_ptr + b); - + gradient += reconstruct(A, order, A[ndim]); + } + std::cout << "\t\tnorm(remainder + hat{T}_block) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; // Do the ALS loop //CP_ALS::ALS(cur_block_size, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); // First do it manually, so we know its right @@ -240,6 +278,7 @@ namespace btas{ size_t count = 0; bool is_converged = false; detail::set_norm(converge_test, this->norm(gradient)); + converge_test.verbose(true); do { ++count; this->num_ALS++; From 4a1304cea6677255dc6f70705469b8c387713ccc Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 22 Aug 2023 12:10:50 -0400 Subject: [PATCH 15/18] update BCD printing --- btas/generic/cp_bcd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/btas/generic/cp_bcd.h b/btas/generic/cp_bcd.h index b2826c4d..afa69ed0 100644 --- a/btas/generic/cp_bcd.h +++ b/btas/generic/cp_bcd.h @@ -169,9 +169,9 @@ namespace btas{ // initial : gradient = E // gradient = E - A1 for (long b = 1; b < n_blocks; ++b) { - std::cout << "\tnorm (remainder) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; this->AtA = std::vector(ndim); BCD(block_sizes[b - 1], block_sizes[b], max_als, fast_pI, matlab, converge_test, s); + std::cout << "\tnorm (remainder) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; // Test the system to see if converged. Doing the hard way first // To compute the accuracy fully compute CP approximation using blocks 0 through b. if(compute_full_fit) { @@ -187,7 +187,7 @@ namespace btas{ // compute_full(block_step + last_blocksize, one_over_tref, change, fit); // } // } - std::cout << "\t\tnorm grad / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; + //std::cout << "\t\tnorm grad / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; epsilon = (compute_full_fit == false ? 1.0 - this->norm(gradient) * one_over_tref : fit); @@ -269,7 +269,7 @@ namespace btas{ gradient += reconstruct(A, order, A[ndim]); } - std::cout << "\t\tnorm(remainder + hat{T}_block) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; + //std::cout << "\t\tnorm(remainder + hat{T}_block) / tref: " << norm(gradient) / norm(tensor_ref) << std::endl; // Do the ALS loop //CP_ALS::ALS(cur_block_size, converge_test, dir, max_als, calculate_epsilon, epsilon, fast_pI); // First do it manually, so we know its right @@ -278,7 +278,7 @@ namespace btas{ size_t count = 0; bool is_converged = false; detail::set_norm(converge_test, this->norm(gradient)); - converge_test.verbose(true); + converge_test.verbose(false); do { ++count; this->num_ALS++; From 8bcee70b4cf1bf3d41c3d62122ddc9188014f879 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 9 Jan 2024 20:04:32 -0500 Subject: [PATCH 16/18] Goal here is to try make new CP Using interpolative decomposition --- btas/btas.h | 1 + btas/generic/cp_als.h | 82 ++++++++++++++++++++++++++++++++ btas/generic/lapack_extensions.h | 26 ++++++++++ 3 files changed, 109 insertions(+) diff --git a/btas/btas.h b/btas/btas.h index 095fd517..9c4fac5f 100644 --- a/btas/btas.h +++ b/btas/btas.h @@ -9,6 +9,7 @@ #include #include +//#include #include #include #include diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index 12bba3f9..4bc0fbc7 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -339,6 +339,7 @@ namespace btas { Tensor &tensor_ref; // Tensor to be decomposed ord_t size; // Total number of elements bool factors_set = false; // Are the factors preset (not implemented yet). + std::vector> pivs; /// Creates an initial guess by computing the SVD of each mode /// If the rank of the mode is smaller than the CP rank requested @@ -659,6 +660,87 @@ namespace btas { Tensor temp(A[n].extent(0), rank); Tensor an(A[n].range()); + // Testing the code to see if pivoted QR can help + if(false){ + // First create a Pivot matrix from the flattened tensor_ref + auto f = flatten(tensor_ref, n); + auto square_dim = f.extent(0), full = f.extent(1); + auto scale_factor = double(full) / double(square_dim); + auto extended = (scale_factor > 2.0 ? square_dim * (scale_factor - 1.0) : full); + std::vector piv; + if(pivs.size() < (n + 1)) { + piv = std::vector(f.extent(1)); + std::vector tau(f.extent(1)); + btas::geqp3_pivot(blas::Layout::RowMajor, f.extent(0), f.extent(1), f.data(), f.extent(1), piv.data(), + tau.data()); + Tensor R(full, square_dim); + R.fill(0.0); + for(auto i = 0; i < square_dim; ++i) { + for (auto j = i; j < square_dim; ++j) { + + } + } + f = flatten(tensor_ref, n); + pivs.emplace_back(piv); + } else { + piv = pivs[n]; + } + + auto K = this->generate_KRP(n, rank, true); + // For reference I compute the Matricized tensor times khatri rao product + gemm(blas::Op::NoTrans, blas::Op::NoTrans, this->one, f, K, this->zero, temp); + detail::set_MtKRP(converge_test, temp); + + Tensor Fp(square_dim, square_dim); + { + Tensor t(square_dim, full); + for (auto i = 0; i < square_dim; ++i) { + for (auto j = 0; j < full; ++j) { + int v = pivs[n][j]; + t(i, j) = f(i, v); + } + } + + for (auto i = 0; i < square_dim; ++i) { + for (auto j = 0; j < square_dim; ++j) { + Fp(i, j) = t(i, j); + } + } + } + + Tensor Kp(square_dim, rank); + { + Tensor t(full, rank); + for(auto j = 0; j < full; ++j) { + int v = pivs[n][j]; + for (auto r = 0; r < rank; ++r) { + t(j, r) = K(v, r); + } + } + + for(auto j = 0; j < square_dim; ++j) { + for (auto r = 0; r < rank; ++r) { + Kp(j, r) = t(j, r); + } + } + } + + // contract the product from above with the pseudoinverse of the Hadamard + // produce an optimize factor matrix + fast_pI = false; + auto pInv = pseudoInverse(Kp, fast_pI); + + Tensor t; + gemm(blas::Op::NoTrans, blas::Op::NoTrans, this->one, Fp, pInv, this->zero, temp); + + // compute the difference between this new factor matrix and the previous + // iteration + this->normCol(temp); + + // Replace the old factor matrix with the new optimized result + A[n] = temp; + return; + } #ifdef BTAS_HAS_INTEL_MKL // Computes the Khatri-Rao product intermediate diff --git a/btas/generic/lapack_extensions.h b/btas/generic/lapack_extensions.h index cb1560e6..bc4ecea0 100644 --- a/btas/generic/lapack_extensions.h +++ b/btas/generic/lapack_extensions.h @@ -40,6 +40,32 @@ int64_t getrf( blas::Layout order, int64_t M, int64_t N, T* A, int64_t LDA, } +template > +int64_t geqp3_pivot( blas::Layout order, int64_t M, int64_t N, T* A, int64_t LDA, + int64_t* IPIV, T* tau, Alloc alloc = Alloc() ) { + + //std::cout << "IN GETRF IMPL" << std::endl; + if( order == blas::Layout::ColMajor ) { + return lapack::geqp3( M, N, A, LDA, IPIV, tau); + } else { + + // Transpose input + auto* A_transpose = alloc.allocate(M*N); + transpose( N, M, A, LDA, A_transpose, M ); + + // A -> LU + auto info = lapack::geqp3( M, N, A_transpose, M, IPIV, tau); + + // Transpose output + cleanup + if(!info) + transpose( M, N, A_transpose, M, A, LDA ); + alloc.deallocate( A_transpose, M*N ); + + return info; + } + +} + template , typename IntAlloc = std::allocator > int64_t gesv( blas::Layout order, int64_t N, int64_t NRHS, T* A, int64_t LDA, From fdb1b8d8f694dd75a24100ecc50373fe15a61250 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Wed, 24 Jan 2024 12:21:19 -0500 Subject: [PATCH 17/18] Fix testing area --- btas/generic/cp_als.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/btas/generic/cp_als.h b/btas/generic/cp_als.h index f66dd324..b10fd6af 100644 --- a/btas/generic/cp_als.h +++ b/btas/generic/cp_als.h @@ -661,25 +661,20 @@ namespace btas { Tensor an(A[n].range()); // Testing the code to see if pivoted QR can help - if(false){ + if (false) { // First create a Pivot matrix from the flattened tensor_ref auto f = flatten(tensor_ref, n); auto square_dim = f.extent(0), full = f.extent(1); auto scale_factor = double(full) / double(square_dim); auto extended = (scale_factor > 2.0 ? square_dim * (scale_factor - 1.0) : full); std::vector piv; - if(pivs.size() < (n + 1)) { + if (pivs.size() < (n + 1)) { piv = std::vector(f.extent(1)); std::vector tau(f.extent(1)); btas::geqp3_pivot(blas::Layout::RowMajor, f.extent(0), f.extent(1), f.data(), f.extent(1), piv.data(), tau.data()); Tensor R(full, square_dim); R.fill(0.0); - for(auto i = 0; i < square_dim; ++i) { - for (auto j = i; j < square_dim; ++j) { - - } - } f = flatten(tensor_ref, n); pivs.emplace_back(piv); } else { @@ -711,14 +706,14 @@ namespace btas { Tensor Kp(square_dim, rank); { Tensor t(full, rank); - for(auto j = 0; j < full; ++j) { + for (auto j = 0; j < full; ++j) { int v = pivs[n][j]; for (auto r = 0; r < rank; ++r) { t(j, r) = K(v, r); } } - for(auto j = 0; j < square_dim; ++j) { + for (auto j = 0; j < square_dim; ++j) { for (auto r = 0; r < rank; ++r) { Kp(j, r) = t(j, r); } From 8007360c90eb28193a46cc03af64b88844172216 Mon Sep 17 00:00:00 2001 From: Karl Pierce Date: Tue, 30 Jan 2024 14:42:53 -0500 Subject: [PATCH 18/18] Bump VG tag --- external/versions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/versions.cmake b/external/versions.cmake index e738e91b..caa3e428 100644 --- a/external/versions.cmake +++ b/external/versions.cmake @@ -1,4 +1,4 @@ -set(BTAS_TRACKED_VGCMAKEKIT_TAG e0d04e91a84b7e71d9b87682c46c518e9966bd78) +set(BTAS_TRACKED_VGCMAKEKIT_TAG 9b541a5f3708a58dce59d00f3c47ac030ef4d8b4) # likely can use earlier, but # - as of oct 2023 tested with 1.71 and up only