diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index a6ff6be2..54784e04 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -36,10 +36,8 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short ExtendedGridOrbitals::subdivx_ = 0; -int ExtendedGridOrbitals::lda_ = 0; -int ExtendedGridOrbitals::numpt_ = 0; -int ExtendedGridOrbitals::loc_numpt_ = 0; +int ExtendedGridOrbitals::lda_ = 0; +int ExtendedGridOrbitals::numpt_ = 0; ExtendedGridOrbitalsPtrFunc ExtendedGridOrbitals::dotProduct_ = &ExtendedGridOrbitals::dotProductDiagonal; int ExtendedGridOrbitals::data_wghosts_index_ = -1; @@ -65,7 +63,7 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, MasksSet* corrmasks, ClusterOrbitals* local_cluster, const bool setup_flag) : name_(std::move(name)), proj_matrices_(proj_matrices), - block_vector_(my_grid, subdivx, bc), + block_vector_(my_grid, 1, bc), grid_(my_grid) { (void)lrs; @@ -74,18 +72,16 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, (void)local_cluster; // preconditions - assert(subdivx > 0); + assert(subdivx == 1); assert(proj_matrices != nullptr); for (short i = 0; i < 3; i++) assert(bc[i] == 0 || bc[i] == 1); assert(grid_.size() > 0); - subdivx_ = subdivx; - numst_ = numst; - numpt_ = grid_.size(); - lda_ = block_vector_.getld(); - loc_numpt_ = numpt_ / subdivx_; + numst_ = numst; + numpt_ = grid_.size(); + lda_ = block_vector_.getld(); assert(numst_ >= 0); @@ -216,7 +212,6 @@ void ExtendedGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(numst_ >= 0); - assert(subdivx_ > 0); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -230,7 +225,7 @@ void ExtendedGridOrbitals::initGauss( const double start1 = grid_.start(1); const double start2 = grid_.start(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -255,34 +250,31 @@ void ExtendedGridOrbitals::initGauss( MemorySpace::Memory::set( ipsi_host_view, ipsi_size, 0); - for (short iloc = 0; iloc < subdivx_; iloc++) + const Vector3D& center(lrs->getCenter(icolor)); + Vector3D xc; + + xc[0] = start0; + for (int ix = 0; ix < dim0; ix++) { - const Vector3D& center(lrs->getCenter(icolor)); - Vector3D xc; + xc[1] = start1; - xc[0] = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) + for (int iy = 0; iy < dim1; iy++) { - xc[1] = start1; - - for (int iy = 0; iy < dim1; iy++) + xc[2] = start2; + for (int iz = 0; iz < dim2; iz++) { - xc[2] = start2; - for (int iz = 0; iz < dim2; iz++) - { - const double r = xc.minimage(center, ll, ct.bcWF); - if (r < rmax) - ipsi_host_view[ix * incx + iy * incy + iz] - = static_cast(exp(-r * r * invrc2)); - else - ipsi_host_view[ix * incx + iy * incy + iz] = 0.; - - xc[2] += hgrid[2]; - } - xc[1] += hgrid[1]; + const double r = xc.minimage(center, ll, ct.bcWF); + if (r < rmax) + ipsi_host_view[ix * incx + iy * incy + iz] + = static_cast(exp(-r * r * invrc2)); + else + ipsi_host_view[ix * incx + iy * incy + iz] = 0.; + + xc[2] += hgrid[2]; } - xc[0] += hgrid[0]; + xc[1] += hgrid[1]; } + xc[0] += hgrid[0]; } MemorySpace::Memory::copy_view_to_dev( @@ -303,7 +295,7 @@ void ExtendedGridOrbitals::initFourier() const double start1 = grid_.start(1) - grid_.origin(1); const double start2 = grid_.start(2) - grid_.origin(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -340,30 +332,27 @@ void ExtendedGridOrbitals::initFourier() ipsi_host_view, numpt_, 0); // TODO this can be done on the GPU with OpenMP - for (short iloc = 0; iloc < subdivx_; iloc++) + double x = start0; + for (int ix = 0; ix < dim0; ix++) { - double x = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) - { - double y = start1; + double y = start1; - for (int iy = 0; iy < dim1; iy++) + for (int iy = 0; iy < dim1; iy++) + { + double z = start2; + for (int iz = 0; iz < dim2; iz++) { - double z = start2; - for (int iz = 0; iz < dim2; iz++) - { - ipsi_host_view[ix * incx + iy * incy + iz] - = 1. - - static_cast(std::cos(kk[0] * x) - * std::cos(kk[1] * y) - * std::cos(kk[2] * z)); - - z += hgrid[2]; - } - y += hgrid[1]; + ipsi_host_view[ix * incx + iy * incy + iz] + = 1. + - static_cast(std::cos(kk[0] * x) + * std::cos(kk[1] * y) + * std::cos(kk[2] * z)); + + z += hgrid[2]; } - x += hgrid[0]; + y += hgrid[1]; } + x += hgrid[0]; } MemorySpace::Memory::copy_view_to_dev( @@ -397,8 +386,6 @@ void ExtendedGridOrbitals::multiply_by_matrix( { prod_matrix_tm_.start(); - assert(subdivx_ > 0); - unsigned int const product_size = numst_ * ldp; ORBDTYPE* product_host_view = MemorySpace::Memory::allocate_host_view( @@ -407,24 +394,20 @@ void ExtendedGridOrbitals::multiply_by_matrix( product, product_size, product_host_view); memset(product_host_view, 0, ldp * numst_ * sizeof(ORBDTYPE)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - // Compute product for subdomain iloc - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, matrix, numst_, 0., - product_host_view + iloc * loc_numpt_, ldp); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, matrix, numst_, 0., product_host_view, ldp); + + MemorySpace::Memory::free_host_view( + phi_host_view); - MemorySpace::Memory::free_host_view( - phi_host_view); - } MemorySpace::Memory::copy_view_to_dev( product_host_view, product_size, product); MemorySpace::Memory::free_host_view( @@ -510,8 +493,8 @@ void ExtendedGridOrbitals::multiply_by_matrix( { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[loc_numpt_ * numst_]; - memset(product, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ORBDTYPE* product = new ORBDTYPE[numpt_ * numst_]; + memset(product, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -519,32 +502,26 @@ void ExtendedGridOrbitals::multiply_by_matrix( matrix.allgather(work_matrix, numst_); - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = numpt_ * sizeof(ORBDTYPE); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, work_matrix, numst_, 0., product, - loc_numpt_); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, work_matrix, numst_, 0., product, numpt_); - for (int color = 0; color < numst_; color++) - memcpy(phi_host_view + color * lda_, product + color * loc_numpt_, - slnumpt); + for (int color = 0; color < numst_; color++) + memcpy(phi_host_view + color * lda_, product + color * numpt_, slnumpt); - MemorySpace::Memory::copy_view_to_dev( - phi_host_view, phi_size, getPsi(0, iloc)); - MemorySpace::Memory::free_host_view( - phi_host_view); - } + MemorySpace::Memory::copy_view_to_dev( + phi_host_view, phi_size, getPsi(0)); + MemorySpace::Memory::free_host_view( + phi_host_view); delete[] product; @@ -858,11 +835,7 @@ int ExtendedGridOrbitals::read_func_hdf5( #else ORBDTYPE* buffer_dev = buffer; #endif - for (short iloc = 0; iloc < subdivx_; iloc++) - { - const int shift = iloc * loc_numpt_; - block_vector_.assignLocal(icolor, iloc, buffer_dev + shift); - } + block_vector_.assignLocal(icolor, 0, buffer_dev); #ifdef HAVE_MAGMA MemorySpace::Memory::free(buffer_dev); #endif @@ -901,7 +874,7 @@ void ExtendedGridOrbitals::computeMatB( const short bcolor = 32; - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); @@ -930,25 +903,18 @@ void ExtendedGridOrbitals::computeMatB( LapOper.rhs(getFuncWithGhosts(icolor + i), work + i * lda_); } - for (short iloc = 0; iloc < subdivx_; iloc++) - { - - MATDTYPE* ssiloc = ss.getRawPtr(iloc); + MATDTYPE* ss0 = ss.getRawPtr(0); - // calculate nf columns of ssiloc - LinearAlgebraUtils::MPgemmTN(numst_, nf, - loc_numpt_, 1., orbitals_psi_host_view + iloc * loc_numpt_, - lda_, work + iloc * loc_numpt_, lda_, 0., - ssiloc + icolor * numst_, numst_); - } + // calculate nf columns of ss0 + LinearAlgebraUtils::MPgemmTN(numst_, nf, numpt_, + grid_.vel(), orbitals_psi_host_view, lda_, work, lda_, 0., + ss0 + icolor * numst_, numst_); } MemorySpace::Memory::free_host_view( orbitals_psi_host_view); delete[] work; - const double vel = grid_.vel(); - ss.scal(vel); proj_matrices_->initializeMatB(ss); matB_tm_.stop(); @@ -974,9 +940,8 @@ void ExtendedGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(numst_ >= 0); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(grid_.vel() > 1.e-8); - assert(subdivx_ > 0); if (numst_ != 0) { @@ -1025,12 +990,11 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, const int ld, LocalMatrices& ss, const bool transpose) { - assert(loc_numpt_ > 0); - assert(loc_numpt_ <= ld); + assert(numpt_ > 0); + assert(numpt_ <= ld); assert(array != nullptr); assert(numst_ != 0); assert(grid_.vel() > 0.); - assert(subdivx_ > 0); const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; @@ -1038,12 +1002,8 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, grid_.vel(), a + iloc * loc_numpt_, lda, - b + +iloc * loc_numpt_, ldb, 0., ss.getRawPtr(iloc), ss.m()); - } + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, + grid_.vel(), a, lda, b, ldb, 0., ss.getRawPtr(0), ss.m()); } void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( @@ -1054,15 +1014,11 @@ void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( for (int icolor = 0; icolor < numst_; icolor++) { - ss[icolor] = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - double alpha - = LinearAlgebraUtils::MPdot(loc_numpt_, - orbitals.getPsi(icolor, iloc), getPsi(icolor, iloc)); + ss[icolor] = 0.; + double alpha = LinearAlgebraUtils::MPdot( + numpt_, orbitals.getPsi(icolor), getPsi(icolor)); - ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); - } + ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); } std::vector tmp(ss); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -1072,7 +1028,7 @@ void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( void ExtendedGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1086,7 +1042,7 @@ void ExtendedGridOrbitals::computeGram( void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(orbitals, ss); @@ -1110,11 +1066,11 @@ void ExtendedGridOrbitals::computeGram(const int verbosity) (*MPIdata::sout) << "ExtendedGridOrbitals::computeGram()" << std::endl; #endif - assert(subdivx_ > 0); - assert(subdivx_ < 1000); + assert(1 > 0); + assert(1 < 1000); assert(numst_ >= 0); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1147,7 +1103,7 @@ double ExtendedGridOrbitals::dotProductWithDM( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1159,7 +1115,7 @@ double ExtendedGridOrbitals::dotProductWithInvS( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1181,7 +1137,7 @@ double ExtendedGridOrbitals::dotProductSimple( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1199,8 +1155,8 @@ double ExtendedGridOrbitals::dotProduct( dot_product_tm_.start(); assert(numst_ >= 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); + assert(1 > 0); + assert(1 < 1000); double dot = 0.; if (dot_type == 0) @@ -1246,8 +1202,8 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, SquareLocalMatrices* localP = matrixTransform; if (matrixTransform == nullptr) - localP = new SquareLocalMatrices( - subdivx_, numst_); + localP + = new SquareLocalMatrices(1, numst_); incrementIterativeIndex(); @@ -1303,12 +1259,10 @@ double ExtendedGridOrbitals::normState(const int gid) const assert(gid >= 0); double tmp = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - // diagonal element - tmp += block_vector_.dot(gid, gid, iloc); - // cout<<"gid="< 1.e-15); diagS[color] = 1. / sqrt(diagS[color]); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - block_vector_.scal(diagS[color], color, iloc); - } + block_vector_.scal(diagS[color], color, 0); } incrementIterativeIndex(); @@ -1498,11 +1432,11 @@ void ExtendedGridOrbitals::projectOut(ExtendedGridOrbitals& orbitals) void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) { assert(lda > 1); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(numst_ >= 0); - assert(lda_ >= loc_numpt_); + assert(lda_ >= numpt_); - SquareLocalMatrices lmatrix(subdivx_, numst_); + SquareLocalMatrices lmatrix(1, numst_); if (numst_ != 0) computeLocalProduct(array, lda, lmatrix, false); @@ -1513,47 +1447,44 @@ void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) #endif proj_matrices_->applyInvS(lmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[loc_numpt_ * numst_]; - memset(tproduct, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ORBDTYPE* tproduct = new ORBDTYPE[numpt_ * numst_]; + memset(tproduct, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - MATDTYPE* localMat_iloc = lmatrix.getRawPtr(iloc); + MATDTYPE* localMat = lmatrix.getRawPtr(); - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, localMat_iloc, numst_, 0., - tproduct, loc_numpt_); + // TODO this can be done on the GPU + // Compute numpt_ rows (for subdomain 0) + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, localMat, numst_, 0., tproduct, numpt_); - MemorySpace::Memory::free_host_view( - phi_host_view); + MemorySpace::Memory::free_host_view( + phi_host_view); - ORBDTYPE* parray = array + iloc * loc_numpt_; - unsigned int const parray_size = numst_ * lda; - ORBDTYPE* parray_host_view = MemorySpace::Memory::allocate_host_view(parray_size); - MemorySpace::Memory::copy_view_to_host( - parray, parray_size, parray_host_view); + ORBDTYPE* parray = array + 0 * numpt_; + unsigned int const parray_size = numst_ * lda; + ORBDTYPE* parray_host_view + = MemorySpace::Memory::allocate_host_view( + parray_size); + MemorySpace::Memory::copy_view_to_host( + parray, parray_size, parray_host_view); - ORBDTYPE minus = -1.; - for (int j = 0; j < numst_; j++) - LinearAlgebraUtils::MPaxpy(loc_numpt_, minus, - tproduct + j * loc_numpt_, parray_host_view + j * lda); + ORBDTYPE minus = -1.; + for (int j = 0; j < numst_; j++) + LinearAlgebraUtils::MPaxpy( + numpt_, minus, tproduct + j * numpt_, parray_host_view + j * lda); - MemorySpace::Memory::copy_view_to_dev( - parray_host_view, parray_size, parray); + MemorySpace::Memory::copy_view_to_dev( + parray_host_view, parray_size, parray); - MemorySpace::Memory::free_host_view( - parray_host_view); - } + MemorySpace::Memory::free_host_view( + parray_host_view); delete[] tproduct; } @@ -1568,7 +1499,7 @@ void ExtendedGridOrbitals::initRand() std::vector yrand(grid_.gdim(1)); std::vector zrand(grid_.gdim(2)); - const int loc_length = dim[0] / subdivx_; + const int loc_length = dim[0] / 1; assert(loc_length > 0); assert(static_cast(loc_length) <= dim[0]); @@ -1597,28 +1528,25 @@ void ExtendedGridOrbitals::initRand() for (unsigned int idx = 0; idx < grid_.gdim(2); idx++) zrand[idx] = ran0() - 0.5; - unsigned int const size = loc_numpt_; + unsigned int const size = numpt_; ORBDTYPE* psi_state_view = MemorySpace::Memory::allocate_host_view(size); MemorySpace::Memory::copy_view_to_host( psi(istate), size, psi_state_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - for (int ix = loc_length * iloc; ix < loc_length * (iloc + 1); ix++) - for (unsigned int iy = 0; iy < dim[1]; iy++) - for (unsigned int iz = 0; iz < dim[2]; iz++) - { - const double alpha = xrand[xoff + ix] * yrand[yoff + iy] - * zrand[zoff + iz]; - - psi_state_view[ix * incx + iy * incy + iz] - = alpha * alpha; - - assert((ix * incx + iy * incy + iz) - < static_cast(lda_)); - } - } + for (int ix = loc_length * 0; ix < loc_length; ix++) + for (unsigned int iy = 0; iy < dim[1]; iy++) + for (unsigned int iz = 0; iz < dim[2]; iz++) + { + const double alpha = xrand[xoff + ix] * yrand[yoff + iy] + * zrand[zoff + iz]; + + psi_state_view[ix * incx + iy * incy + iz] = alpha * alpha; + + assert((ix * incx + iy * incy + iz) + < static_cast(lda_)); + } + MemorySpace::Memory::copy_view_to_dev( psi_state_view, size, psi(istate)); MemorySpace::Memory::free_host_view( @@ -1650,22 +1578,21 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( MemorySpace::Memory::copy_view_to_host( block_vector_.vect(0), block_vector_size, block_vector_host_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - Apsi.getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + Apsi.getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, vel, block_vector_host_view + iloc * loc_numpt_, lda_, - phi_host_view, lda_, 1., work.data(), numst_); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, vel, + block_vector_host_view + 0 * numpt_, lda_, phi_host_view, lda_, 1., + work.data(), numst_); + + MemorySpace::Memory::free_host_view( + phi_host_view); - MemorySpace::Memory::free_host_view( - phi_host_view); - } MemorySpace::Memory::free_host_view( block_vector_host_view); @@ -1713,14 +1640,11 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( void ExtendedGridOrbitals::computeGlobalIndexes() { overlapping_gids_.clear(); - overlapping_gids_.resize(subdivx_); - for (short iloc = 0; iloc < subdivx_; iloc++) + overlapping_gids_.resize(1); + overlapping_gids_[0].resize(numst_, -1); + for (int gid = 0; gid < numst_; gid++) { - overlapping_gids_[iloc].resize(numst_, -1); - for (int gid = 0; gid < numst_; gid++) - { - overlapping_gids_[iloc][gid] = gid; - } + overlapping_gids_[0][gid] = gid; } } diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 244a150f..92809507 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -60,7 +60,6 @@ class ExtendedGridOrbitals : public Orbitals static int lda_; // leading dimension for storage static int numpt_; - static int loc_numpt_; // static double (ExtendedGridOrbitals::*dotProduct_)(const // ExtendedGridOrbitals&); @@ -140,8 +139,6 @@ class ExtendedGridOrbitals : public Orbitals protected: const pb::Grid& grid_; - static short subdivx_; - // indexes corresponding to valid function in each subdomain static std::vector> overlapping_gids_; @@ -195,7 +192,7 @@ class ExtendedGridOrbitals : public Orbitals int numst(void) const { return numst_; } int getLda() const { return lda_; } - int getLocNumpt() const { return loc_numpt_; } + int getLocNumpt() const { return numpt_; } int getNumpt() const { return numpt_; } bool isCompatibleWith(const ExtendedGridOrbitals&) const { return true; } @@ -262,10 +259,10 @@ class ExtendedGridOrbitals : public Orbitals assert(new_storage != 0); block_vector_.setStorage(new_storage); } - ORBDTYPE* getPsi(const int i, const short iloc = 0) const + ORBDTYPE* getPsi(const int i, const int iloc = 0) const { - assert(iloc < subdivx_); - return block_vector_.vect(i) + iloc * loc_numpt_; + assert(iloc == 0); + return block_vector_.vect(i); } template void setPsi(const pb::GridFunc& gf_work, const int ist) @@ -283,7 +280,7 @@ class ExtendedGridOrbitals : public Orbitals assert(numst_ < 10000); return numst_; } - short subdivx(void) const { return subdivx_; } + short subdivx(void) const { return 1; } void printChromaticNumber(std::ostream& os) const { if (onpe0) os << " Max. chromatic_number: " << numst_ << std::endl; diff --git a/src/SinCosOps.cc b/src/SinCosOps.cc index 85f4ea79..e1c3fdd6 100644 --- a/src/SinCosOps.cc +++ b/src/SinCosOps.cc @@ -39,7 +39,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -64,7 +64,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) MemorySpace::Memory::copy_view_to_host( orbitals.psi(0), size_psi, psi_view); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -153,7 +153,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -200,7 +200,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -274,7 +274,7 @@ void SinCosOps::computeSquare1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -298,7 +298,7 @@ void SinCosOps::computeSquare1D( } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -366,7 +366,7 @@ void SinCosOps::compute1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -389,7 +389,7 @@ void SinCosOps::compute1D( const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -466,7 +466,7 @@ void SinCosOps::computeDiag2states( color_st[ic] = orbitals.getColor(st[ic]); } - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -487,7 +487,7 @@ void SinCosOps::computeDiag2states( const short mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -558,7 +558,7 @@ void SinCosOps::compute2states( int n2 = 4; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -578,7 +578,7 @@ void SinCosOps::compute2states( const int mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -656,7 +656,7 @@ void SinCosOps::compute( const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals1.subdivx_; + int loc_length = dim0 / orbitals1.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -671,9 +671,8 @@ void SinCosOps::compute( vector cosz; grid.getSinCosFunctions(sinx, siny, sinz, cosx, cosy, cosz); - for (short iloc = 0; iloc < orbitals1.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals1.subdivx(); iloc++) { - for (int color = 0; color < orbitals1.chromatic_number(); color++) { int i = orbitals1.overlapping_gids_[iloc][color]; @@ -741,7 +740,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -768,7 +767,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (short icolor = 0; icolor < size; icolor++) {