made exchange more accurate, switched to iterative instead of increme…

…ntal solving of the residual equations
m-a-d-n-e-s-s · Mar 8, 2012 · 30eab97 · 30eab97
1 parent fec9b02
commit 30eab97
Show file tree

Hide file tree

Showing 4 changed files with 118 additions and 104 deletions.
diff --git a/src/apps/examples/mp2.cc b/src/apps/examples/mp2.cc
@@ -197,12 +197,11 @@ namespace madness {
 
             // include the purely local potential that (partially) cancels 1/r12
             if (_gamma>0.0) {
-                real_function_6d pair=hartree_product(phi_i,phi_j);
                 real_function_6d fg3=real_factory_6d(world).functor2(fg_(_gamma)).is_on_demand();
                 real_function_6d mul=CompositeFactory<double,6,3>(world)
-                            .g12(fg3).particle1(copy(phi_i)).particle2(copy(phi_j));
-                mul.fill_tree().truncate();
-                mul.print_size("mul truncated");
+                                    .g12(fg3).particle1(copy(phi_i)).particle2(copy(phi_j));
+                mul.fill_tree(op_mod).truncate();
+                mul.print_size("mul");
 
                 result=result+mul;
             }
@@ -752,8 +751,9 @@ namespace madness {
         void solve_residual_equations(const int i, const int j) {
 
             ElectronPair result=guess_mp1_2(i,j);
-            compute_V(result);
-            if (world.rank()==0) printf("finished iteration %2d at time %.1fs\n\n", 0, wall_time());
+            double energy=compute_V(result);
+            if (world.rank()==0) printf("finished with prep step at time %6.1fs with energy %12.8f\n\n",
+            		wall_time(),energy);
 
             // the Green's function depends on the zeroth order energy, which is the sum
             // of the orbital energies of orbitals i and j
@@ -763,12 +763,11 @@ namespace madness {
             real_convolution_6d green = BSHOperator<6>(world, sqrt(-2*eps), 0.00001,
             		FunctionDefaults<6>::get_thresh()*0.1);
 
-
-            if (0) {
+            if (1) {
 				if (world.rank()==0) print("computing iteratively");
 				real_function_6d constant_term;
 				load_function(world,constant_term,"GVpair");
-				for (int ii=0; ii<20; ++ii) {
+				for (int ii=1; ii<20; ++ii) {
 
 					real_function_6d vphi=multiply_with_0th_order_Hamiltonian(result.function,i,j);
 
@@ -780,11 +779,14 @@ namespace madness {
 					result.function=(constant_term+tmp).truncate();
 					result.function=Q12(result.function);
 
-					compute_V(result);
+					double old_energy=energy;
+					energy=compute_V(result);
 
 					const std::string name="psi1_it"+stringify(ii);
 					save_function(world,result.function,name);
-					if (world.rank()==0) printf("finished iteration %2d at time %.1fs\n\n", ii, wall_time());
+					if (world.rank()==0) printf("finished iteration %2d at time %6.1fs with energy %12.8f\n\n",
+							ii, wall_time(),energy);
+					if (std::abs(old_energy-energy)<result.function.thresh()*0.01) break;
 
 				}
             } else {
@@ -894,55 +896,37 @@ namespace madness {
 
         void test(const int i, const int j) {
 
-            // some functions repeatedly used
         	ElectronPair pair;
         	pair.i=i; pair.j=j;
-            pair.phi0=this->zeroth_order_function(i,j);
-
-            const double gamma=corrfac.gamma();
-
-            real_convolution_3d p=CoulombOperator(world,0.00001,hf.get_calc().param.econv);
-            real_convolution_3d b=BSHOperator<3>(world,gamma,0.00001,hf.get_calc().param.econv);
-
-            real_function_3d ij=hf.orbital(i)*hf.orbital(j);
-            real_function_3d f=p(ij)-b(ij)*4*constants::pi;
-            double a2=inner(ij,f)/(2.0*gamma);
-            if (world.rank()==0) printf("< phi0 | fg      | phi0 > w/ convolution %12.8f\n",a2);
-
-
-            real_function_6d fg=FGFactory<double,6>(world,gamma).dcut(dcut);
-            real_function_6d eri=ERIFactory<double,6>(world).dcut(dcut);
-            real_function_6d tmp1=CompositeFactory<double,6,3>(world)
-                                    .g12(fg).ket(copy(pair.phi0));
-
-            double a1=inner(pair.phi0,tmp1)/(2.0*gamma);
-            if (world.rank()==0) printf("< phi0 | fg      | phi0 >  %12.8f\n",a1);
-
+        	const int particle=1;
 
+            const double eps=zeroth_order_energy(i,j);
+            real_convolution_6d green = BSHOperator<6>(world, sqrt(-2.0*eps), 0.00001,
+            		FunctionDefaults<6>::get_thresh()*0.1);
+            real_function_3d phi=hf.orbital(i);
+
+        	green.doleaves=false;
+        	real_function_6d gvmod, gvns;
+        	{
+            	green.modified()=true;
+        		gvmod=green(phi,phi);
+        		gvmod.print_size("gvmod");
+        		print("modified norm",gvmod.norm2());
+                if (world.rank()==0) printf("finished at time %.1fs\n\n", wall_time());
+        	}
+        	{
+            	green.modified()=false;
+        		gvns=green(phi,phi);
+//        		save_function(world,gvns,"gvphi_non_mod");
+//        		load_function(world,gvns,"gvphi_non_mod");
+        		print("non-modified norm",gvns.norm2());
+                if (world.rank()==0) printf("finished at time %.1fs\n\n", wall_time());
+        	}
+        	real_function_6d diff=gvmod-gvns;
+        	double d=diff.norm2();
+    		print("diff norm",d);
 
 
-//			real_function_6d Kphi0=K(pair.phi0);
-//			double a1=inner(pair.phi0,Kphi0);
-//            if (world.rank()==0) printf("< phi0 | K     | phi0 >  %12.8f\n",a1);
-//
-//            double a2=inner(hf.orbital(i),K(hf.orbital(j)));
-//            if (world.rank()==0) printf("< i    | K     | j >  %12.8f\n",a2);
-//
-//            pair.r12phi=CompositeFactory<double,6,3>(world).g12(corrfac.f()).ket(copy(pair.phi0));
-//            pair.r12phi.fill_tree().truncate().reduce_rank();
-//
-//        	load_balance(pair.r12phi,true);
-//
-//        	const real_function_6d Kfphi0=K(pair.r12phi);
-//        	const real_function_6d Kphi0=make_Kphi0(i,j);
-//			real_function_6d fKphi0=CompositeFactory<double,6,3>(world).g12(corrfac.f()).ket(Kphi0);
-//
-//			const double a=inner(pair.phi0,Kfphi0) - inner(pair.phi0,fKphi0);
-//        	if (world.rank()==0) printf("<ij | Kf - fK | ij>      %12.8f\n",a);
-//
-//        	const real_function_6d KffKphi0=this->make_KffKphi0(pair);
-//			const double b=inner(pair.phi0,KffKphi0);
-//        	if (world.rank()==0) printf("<ij | [K,f]   | ij>      %12.8f\n",b);
 
         }
 
@@ -1039,7 +1023,7 @@ namespace madness {
 
             LoadBalanceDeux<6> lb(world);
             if (leaf) lb.add_tree(f,LBCost(1.0,0.1));
-            else lb.add_tree(f,LBCost(0.1,1.0));
+            else lb.add_tree(f,LBCost(0.001,1.0));
             FunctionDefaults<6>::redistribute(world, lb.load_balance(2.0,false));
             if(world.rank() == 0) printf("redistributed at time   %.1fs\n", wall_time());
 
@@ -1418,7 +1402,7 @@ namespace madness {
 			real_function_6d Vpair1=(pair.Uphi0-pair.KffKphi0).truncate().reduce_rank();
 			Vpair1=Q12(Vpair1).truncate().reduce_rank();
 			plot_plane(Vpair1,"Vpair1");
-			load_balance(Vpair1,true);
+			load_balance(Vpair1,false);
 			real_function_6d GVpair=green(-2.0*Vpair1).truncate().reduce_rank();
 			Vpair1.clear(true);
 
@@ -1942,12 +1926,6 @@ namespace madness {
             vphi.fill_tree(op_mod).truncate();
             vphi.print_size("(V_nuc + J1 + J2) |ket>:  made V tree");
 
-            // add exchange
-//            for (int k=0; k<hf.nocc(); ++k) {
-//                vphi=vphi-apply_exchange(f,hf.orbital(k),1);
-//                vphi=vphi-apply_exchange(f,hf.orbital(k),2);
-//                vphi.truncate().reduce_rank();
-//            }
             vphi=(vphi-K(f)).truncate().reduce_rank();
             vphi.print_size("(V_nuc + J - K) |ket>: the tree");
 

diff --git a/src/lib/mra/funcimpl.h b/src/lib/mra/funcimpl.h
@@ -261,6 +261,7 @@ namespace madness {
         /// @param[in]  gcoeff coefficients of g of its appropriate key in NS form
         bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const {
 
+        	if (key.level()<2) return false;
             Slice s = Slice(0,k-1);
             std::vector<Slice> s0(NDIM/2,s);
 
@@ -412,6 +413,18 @@ namespace madness {
     };
 
 
+    template<size_t NDIM>
+    struct true_op {
+
+    	template<typename T>
+        bool operator()(const Key<NDIM>& key, const T& t) const {return true;}
+
+    	template<typename T, typename R>
+        bool operator()(const Key<NDIM>& key, const T& t, const R& r) const {return true;}
+        template <typename Archive> void serialize (Archive& ar) {}
+
+    };
+
     /// FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree
     template<typename T, std::size_t NDIM>
     class FunctionNode {
@@ -3500,8 +3513,8 @@ namespace madness {
 								iaket.datum.second.coeff()).full_tensor_copy();
 					} else {
 						MADNESS_ASSERT(iap1.impl and iap2.impl);
-						hartree_leaf_op<T,NDIM> hlop(result.impl,result.impl->get_k());
-						hartree_op<LDIM,hartree_leaf_op<T,NDIM> > op(result.impl,iap1,iap2,hlop);
+						true_op<NDIM> hlop;
+						hartree_op<LDIM,true_op<NDIM> > op(result.impl,iap1,iap2,hlop);
 						const coeffT coeff_ket=op(key).second;
 						val_ket=result.impl->coeffs2values(key,coeff_ket).full_tensor_copy();
 					}
@@ -3982,7 +3995,7 @@ namespace madness {
 
         /// sum all the contributions from all scales after applying an operator in mod-NS form
         void trickle_down(bool fence) {
-            MADNESS_ASSERT(is_redundant());
+//            MADNESS_ASSERT(is_redundant());
             nonstandard = compressed = redundant = false;
 //            this->print_size("in trickle_down");
             if (world.rank() == coeffs.owner(cdata.key0))
@@ -5019,17 +5032,22 @@ namespace madness {
 
                 if (not is_leaf) {
 					// new coeffs are simply the hartree/kronecker/outer product --
-                	const coeffT coeff=outer_low_rank(fcoeff,gcoeff);
+                	const std::vector<Slice>& s0=iaf.impl->cdata.s0;
+                	const coeffT coeff = (apply_op->modified())
+                			? outer_low_rank(copy(fcoeff(s0)),copy(gcoeff(s0)))
+                			: outer_low_rank(fcoeff,gcoeff);
 
                 	// now send off the application
                     tensorT coeff_full;
                     ProcessID p = FunctionDefaults<NDIM>::get_apply_randomize()
                     		? result->world.random_proc() : result->coeffs.owner(key);
-                    Future<double> norm0=result->task(p,&implT:: template do_apply_shell<opT,T>,
-                    		apply_op, key, coeff, coeff_full, 0);
+//                    Future<double> norm0=result->task(p,&implT:: template do_apply_shell<opT,T>,
+//                    		apply_op, key, coeff, coeff_full, 0);
+                    double norm0=result->do_apply_shell<opT,T>(apply_op, key, coeff, coeff_full, 0);
+
                     result->task(p,&implT:: template apply_shells<opT,T>, apply_op, key, coeff, norm0);
 
-                    return finalize(norm0.get(),key,coeff);
+                    return finalize(norm0,key,coeff);
 
                 } else {
                 	return std::pair<bool,coeffT> (is_leaf,coeffT());
@@ -5134,6 +5152,7 @@ namespace madness {
                     result->task(p,&implT:: template forward_apply_shells<opT,T>,
                     		apply_op, key, coeff, norm0);
 
+                    if (node.is_leaf()) return std::pair<bool,coeffT> (true,coeff);
                     return finalize(norm0,key,coeff);
 
                 } else {

diff --git a/src/lib/mra/mra.h b/src/lib/mra/mra.h
@@ -1837,8 +1837,9 @@ namespace madness {
     	bool same=(ff1.get_impl()==ff2.get_impl());
 
     	// keep the leaves! They are assumed to be there later
-		if (not same) ff1.nonstandard(true,false);
-		ff2.nonstandard(true,true);
+    	// even for modified op we need NS form for the hartree_leaf_op
+    	if (not same) ff1.nonstandard(true,false);
+    	ff2.nonstandard(true,true);
 
 
         FunctionFactory<T,LDIM+LDIM> factory=FunctionFactory<resultT,LDIM+LDIM>(f1.world())
@@ -1866,10 +1867,16 @@ namespace madness {
             op.timer_low_accumulate.print("op low rank addition ");
         }
 
-		if (not same) ff1.standard(false);
-		ff2.standard(false);
 		result.get_impl()->finalize_apply(true);	// need fence before reconstruct
-		result.reconstruct();
+
+        if (op.modified()) {
+            result.get_impl()->trickle_down(true);
+        } else {
+    		result.reconstruct();
+        }
+    	if (not same) ff1.standard(false);
+    	ff2.standard(false);
+
 		return result;
     }
 

diff --git a/src/lib/mra/operator.h b/src/lib/mra/operator.h
@@ -413,6 +413,8 @@ namespace madness {
             for (std::size_t d=0; d<NDIM; ++d) Tnorm *= ops_1d[d]->Tnorm;
 
             if (at.t_term and (Tnorm>0.0)) {
+                tol = tol/(Tnorm*NDIM);  // Errors are relative within here
+
                 long break_even;
                 if (NDIM==1) break_even = long(0.5*k);
                 else if (NDIM==2) break_even = long(0.6*k);
@@ -467,37 +469,45 @@ namespace madness {
             for (std::size_t d=0; d<NDIM; ++d) Rnorm *= ops_1d[d]->Rnorm;
             if (Rnorm == 0.0) return;
 
-            tol = tol/(Rnorm*NDIM);  // Errors are relative within here
-
-            // Determine rank of SVD to use or if to use the full matrix
-            long twok = 2*k;
-            if (modified()) twok=k;
-            long break_even;
-            if (NDIM==1) break_even = long(0.5*twok);
-            else if (NDIM==2) break_even = long(0.6*twok);
-            else if (NDIM==3) break_even=long(0.65*twok);
-            else break_even=long(0.7*twok);
-            for (std::size_t d=0; d<NDIM; ++d) {
-                long r;
-                for (r=0; r<twok; ++r) {
-                    if (ops_1d[d]->Rs[r] < tol) break;
-                }
-                if (r >= break_even) {
-                    trans[d].r = twok;
-                    trans[d].U = ops_1d[d]->R.ptr();
-                    trans[d].VT = 0;
-                }
-                else {
-                    r += (r&1L);
-                    trans[d].r = std::max(2L,r);
-                    trans[d].U = ops_1d[d]->RU.ptr();
-                    trans[d].VT = ops_1d[d]->RVT.ptr();
-                }
-                trans2[d]=ops_1d[d]->R;
+            if (Rnorm > 1.e-20) {
+
+				tol = tol/(Rnorm*NDIM);  // Errors are relative within here
+
+				// Determine rank of SVD to use or if to use the full matrix
+				long twok = 2*k;
+				if (modified()) twok=k;
+				long break_even;
+				if (NDIM==1) break_even = long(0.5*twok);
+				else if (NDIM==2) break_even = long(0.6*twok);
+				else if (NDIM==3) break_even=long(0.65*twok);
+				else break_even=long(0.7*twok);
+				for (std::size_t d=0; d<NDIM; ++d) {
+					long r;
+					for (r=0; r<twok; ++r) {
+						if (ops_1d[d]->Rs[r] < tol) break;
+					}
+					if (r >= break_even) {
+						trans[d].r = twok;
+						trans[d].U = ops_1d[d]->R.ptr();
+						trans[d].VT = 0;
+					}
+					else {
+						r += (r&1L);
+						trans[d].r = std::max(2L,r);
+						trans[d].U = ops_1d[d]->RU.ptr();
+						trans[d].VT = ops_1d[d]->RVT.ptr();
+					}
+					trans2[d]=ops_1d[d]->R;
+				}
+				apply_transformation2(n, twok, tol, trans2, f, work1, work2, work5, mufac, result);
             }
-            apply_transformation2(n, twok, tol, trans2, f, work1, work2, work5, mufac, result);
 
-            if (n > 0) {
+            double Tnorm = 1.0;
+            for (std::size_t d=0; d<NDIM; ++d) Tnorm *= ops_1d[d]->Tnorm;
+
+            if (n > 0 and (Tnorm>1.e-20)) {
+				long break_even;
+
                 if (NDIM==1) break_even = long(0.5*k);
                 else if (NDIM==2) break_even = long(0.6*k);
                 else if (NDIM==3) break_even=long(0.65*k);