diff --git a/geometric/engine.py b/geometric/engine.py index bfb25e0e..17925861 100644 --- a/geometric/engine.py +++ b/geometric/engine.py @@ -650,7 +650,7 @@ def calc_wq_new(self, coords, dirname): out_scr += ['grad.xyz', 'mullpop'] for f in out_scr: out_files.append((os.path.join(dirname, self.scr, f), os.path.join(self.scr, f))) - queue_up_src_dest(wq, "terachem run.in &> run.out", in_files, out_files, verbose=False, print_time=600) + queue_up_src_dest(wq, "terachem run.in > run.out 2>&1", in_files, out_files, verbose=False, print_time=600) def number_output(self, dirname, calcNum): if not os.path.exists(os.path.join(dirname, 'run.out')): @@ -1455,7 +1455,7 @@ def calc_wq_new(self, coords, dirname): out_files = [('%s/output.dat' % dirname, 'output.dat'), ('%s/run.log' % dirname, 'run.log')] # We will assume that the number of threads on the worker is 1, as this maximizes efficiency # in the limit of large numbers of jobs, although it may be controlled via environment variables. - queue_up_src_dest(wq, 'psi4 input.dat &> run.log', in_files, out_files, verbose=False) + queue_up_src_dest(wq, 'psi4 input.dat > run.log 2>&1', in_files, out_files, verbose=False) def read_result(self, dirname, check_coord=None): """ Read Psi4 calculation output. """ @@ -1604,13 +1604,15 @@ def calc_wq_new(self, coords, dirname): self.M.edit_qcrems({'jobtype':'force'}) in_files = [('%s/run.in' % dirname, 'run.in')] out_files = [('%s/run.out' % dirname, 'run.out'), ('%s/run.log' % dirname, 'run.log')] + out_files += [('%s/run.d/131.0' % dirname, 'run.d/131.0')] if self.qcdir: self.M.edit_qcrems({'scf_guess':'read'}) in_files += [('%s/run.d/53.0' % dirname, '53.0')] - out_files += [('%s/run.d/131.0' % dirname, 'run.d/131.0')] - cmdstr = "mkdir -p run.d ; mv 53.0 run.d ; qchem run.in run.out run.d &> run.log" + cmdstr = "mkdir -p run.d ; mv 53.0 run.d ; qchem run.in run.out run.d > run.log 2>&1" else: - cmdstr = "qchem%s run.in run.out &> run.log" % self.nt() + if not os.path.exists('%s/run.d' % dirname): + os.makedirs('%s/run.d' % dirname) + cmdstr = "qchem%s run.in run.out run.d > run.log 2>&1" % self.nt() self.M[0].write(os.path.join(dirname, 'run.in')) queue_up_src_dest(wq, cmdstr, in_files, out_files, verbose=False) @@ -1638,7 +1640,7 @@ def read_result(self, dirname, check_coord=None): gradient = np.fromfile('%s/run.d/131.0' % dirname) except FileNotFoundError: logger.info("Reading gradient from Q-Chem output instead of run.d/131.0 because the latter cannot be found. Please report this to the developer.\n") - gradient = M1.qm_grads[-1] + gradient = M1.qm_grads[-1].flatten() # Assume that the last occurence of "S^2" is what we want. s2 = 0.0 # The 'iso-8859-1' prevents some strange errors that show up when reading the Archival summary line diff --git a/geometric/internal.py b/geometric/internal.py index 58ceaa28..a7cf2bed 100644 --- a/geometric/internal.py +++ b/geometric/internal.py @@ -1,7 +1,7 @@ """ internal.py: Internal coordinate systems -Copyright 2016-2020 Regents of the University of California and the Authors +Copyright 2016-2023 Regents of the University of California and the Authors Authors: Lee-Ping Wang, Chenchen Song @@ -1741,7 +1741,7 @@ def diagnostic(self, xyz): # result = 3 or above: failure of the IC system is imminent # result = 2: IC may fail for small perturbations from the provided geometry # result = 1: IC is unsuitable for the provided geometry, should pay attention - LinThres = [0.95, 0.98, 0.995] + LinThres = [np.abs(np.cos(theta*np.pi/180)) for theta in [155, 165, 175]] descrips = ["close to linear", "very close to linear", "extremely close to linear"] a = self.a b = self.b @@ -1758,7 +1758,7 @@ def diagnostic(self, xyz): thre = LinThres[i] desc = descrips[i] if cos1 > thre and cos2 > thre: - result = i+2 + result = i+1 message = "%s and %s angles are %s (|cos(theta)| %.4f, %.4f > %.4f threshold)" % (repr(Ang1), repr(Ang2), desc, cos1, cos2, thre) elif cos1 > thre: result = i+1 @@ -2034,8 +2034,9 @@ def convert_angstroms_degrees(prims, values): CacheWarning = False class InternalCoordinates(object): - def __init__(self): + def __init__(self, verbose=0): self.stored_wilsonB = OrderedDict() + self.verbose = verbose def addConstraint(self, cPrim, cVal): raise NotImplementedError("Constraints not supported with Cartesian coordinates") @@ -2434,8 +2435,8 @@ def rigid(self, val): self._rigid = val class PrimitiveInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, repulsions=[], transfers=[], connect_isolated=True, **kwargs): - super(PrimitiveInternalCoordinates, self).__init__() + def __init__(self, molecule, connect=False, addcart=False, constraints=None, cvals=None, repulsions=[], transfers=[], connect_isolated=True, verbose=0, **kwargs): + super(PrimitiveInternalCoordinates, self).__init__(verbose=verbose) # connect = True corresponds to "traditional" internal coordinates with minimum spanning bonds self.connect = connect # connect = False, addcart = True corresponds to HDLC @@ -2453,7 +2454,6 @@ def __init__(self, molecule, connect=False, addcart=False, constraints=None, cva self.transfers = transfers self.cPrims = [] self.cVals = [] - self.sync = [] self.Rotators = OrderedDict() self.elem = molecule.elem # List of fragments as determined by residue ID, distance criteria or bond order @@ -2803,7 +2803,7 @@ def makePrimitives(self, molecule, connect, addcart): aline.append(az) if atom_lines == atom_lines0: break atom_lines_uniq = [] - for i in atom_lines: # + for i in atom_lines: if tuple(i) not in set(atom_lines_uniq): atom_lines_uniq.append(tuple(i)) lthree = [l for l in atom_lines_uniq if len(l) > 2] @@ -3122,50 +3122,119 @@ def second_derivatives(self, xyz): # 5) 3 return np.array(answer) - def calcDiff(self, xyz1, xyz2, sync=0): + def calcDiff(self, xyz1, xyz2, sync=False): """ Calculate difference in internal coordinates (coord1-coord2), accounting for changes in 2*pi of angles. """ answer = [] for i, Internal in enumerate(self.Internals): diff = Internal.calcDiff(xyz1, xyz2) - if sync != 0 and i in self.sync: - # Sometimes, e.g. during interpolation, multiple dihedrals can change in opposite directions - # resulting in clashes. By turning on the sync flag we ensure the change is always in the same direction. - if sync == 1 and diff < -np.pi/2: - # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff+2*np.pi)) - diff += 2*np.pi - elif sync == -1 and diff > np.pi/2: - # print("dihedral-sync %i: %50s %.3f -> %.3f" % (sync, Internal, diff, diff-2*np.pi)) - diff -= 2*np.pi - elif sync not in [-1, 1]: - raise RuntimeError("Invalid value of sync") answer.append(diff) - return np.array(answer) + answer = np.array(answer) + if sync: + answer = self.syncDihedrals(xyz1, xyz2, answer) + return answer + + def syncDihedrals(self, xyz1, xyz2, primdiff, distthre=1.0, linthre=0.95, diffthre=np.pi/3): + rotors = {} + val1 = self.calculate(xyz1) - def syncDihedrals(self, xyz1, xyz2): - answer = self.calcDiff(xyz1, xyz2) - dih_by_trip = {} for i, Prim in enumerate(self.Internals): if type(Prim) is Dihedral: - dih_by_trip.setdefault((Prim.a, Prim.b, Prim.c), []).append(i) - dih_by_trip.setdefault((Prim.d, Prim.c, Prim.b), []).append(i) - for (a, b, c), dihIdx in dih_by_trip.items(): - # Being conservative here; only sync dihedrals where a-b-c are bonded and angles are not linear. - if np.abs(Distance(a, b).calcDiff(xyz2, xyz1)) > 0.5: continue - if np.abs(Distance(b, c).calcDiff(xyz2, xyz1)) > 0.5: continue - if np.abs(np.cos(Angle(a, b, c).value(xyz1))) > 0.95: continue - if np.abs(np.cos(Angle(a, b, c).value(xyz2))) > 0.95: continue - diffs = np.array([answer[i] for i in dihIdx]) - if all(np.abs(diffs) > np.pi/2) and len(np.unique(np.sign(diffs))) > 1: - for i in dihIdx: - self.sync.append(i) - # print("Turning on dihedral sync for", self.Internals[i]) - # self.Internals[i].sync = True - # if answer[i] < 0: - # answer[i] += 2*np.pi - # print("Dihedrals with center bond %s:" % str(bond)) - # for i in dihIdx: - # print("%50s %.3f" % (self.Internals[i], answer[i])) - + if Prim.b < Prim.c: + a, b, c, d = (Prim.a, Prim.b, Prim.c, Prim.d) + else: + a, b, c, d = (Prim.d, Prim.c, Prim.b, Prim.a) + if np.abs(Distance(a, b).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(Distance(b, c).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(Distance(c, d).calcDiff(xyz2, xyz1)) > distthre: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz1))) > linthre: continue + if np.abs(np.cos(Angle(a, b, c).value(xyz2))) > linthre: continue + if np.abs(np.cos(Angle(b, c, d).value(xyz1))) > linthre: continue + if np.abs(np.cos(Angle(b, c, d).value(xyz2))) > linthre: continue + rotors.setdefault((b, c), []).append(i) + + newdiff = primdiff.copy() + newdiffAlt = primdiff.copy() + + def dihedral_clash(dih_by_abc, dih_by_bcd, diff_): + clashPrims = [] + for dih_by_trip in (dih_by_abc, dih_by_bcd): + for iEnd, tPrims in dih_by_trip.items(): + # print("Dihedral angles pairs with %i, %i central bond and %i end atom:" % (b+1, c+1, iEnd+1)) + for i in range(len(tPrims)): + for j in range(i): + iPrim = tPrims[i] + jPrim = tPrims[j] + iDih = self.Internals[iPrim] + jDih = self.Internals[jPrim] + A = iDih.value(xyz1) + B = A + diff_[iPrim] + C = jDih.value(xyz1) + D = C + diff_[jPrim] + xzero = (C-A)/(B+C-D-A) + xplus = (C-A+2*np.pi)/(B+C-D-A) + xminus = (C-A-2*np.pi)/(B+C-D-A) + xarray = np.array([xzero, xplus, xminus]) + if ((xarray<1)*(xarray>0)).any(): + # print("\x1b[1;91mWarning:\x1b[0m %20s - %-20s may clash" % (self.Internals[iPrim], self.Internals[jPrim]), end=' ') + if self.verbose >= 2: + print("%20s - %-20s may clash" % (self.Internals[iPrim], self.Internals[jPrim]), end=' ') + print("% 10.5f -> % 10.5f ; % 10.5f -> % 10.5f" % (A*180/np.pi, B*180/np.pi, C*180/np.pi, D*180/np.pi)) + # print("Crossing points: % 10.5f % 10.5f % 10.5f" % (xzero, xplus, xminus)) + clashPrims.append((iDih, jDih)) + return clashPrims + + for (b, c), iPrims in rotors.items(): + if len(iPrims) < 2: continue + dih_by_abc = {} + dih_by_bcd = {} + for iPrim in iPrims: + Prim = self.Internals[iPrim] + if Prim.b < Prim.c: + a, b, c, d = (Prim.a, Prim.b, Prim.c, Prim.d) + else: + a, b, c, d = (Prim.d, Prim.c, Prim.b, Prim.a) + dih_by_abc.setdefault(a, []).append(iPrim) + dih_by_bcd.setdefault(d, []).append(iPrim) + # The "unchanged" delta(dihedral angles) that come from calcDiff + diffzero = primdiff[iPrims] + # Modified dihedral angle deltas where deltas larger than a threshold are forced to be all positive + diffplus = primdiff[iPrims] + 2*np.pi*(primdiff[iPrims]<0) * (np.abs(primdiff[iPrims])>diffthre) + # Modified dihedral angle deltas where deltas larger than a threshold are forced to be all negative + diffminus = primdiff[iPrims] - 2*np.pi*(primdiff[iPrims]>0) * (np.abs(primdiff[iPrims])>diffthre) + # Choose the set of deltas that has a smaller overall rotation. + # The "alternatives" correspond to rotating the other way. + print_changes = False + if np.allclose(diffzero, diffplus, atol=1e-6, rtol=0) or np.allclose(diffzero, diffminus, atol=1e-6, rtol=0): + print_changes = False + elif np.abs(np.mean(diffplus)) < np.abs(np.mean(diffminus)): + newdiff[iPrims] = diffplus + newdiffAlt[iPrims] = diffminus + print_changes = True + elif np.abs(np.mean(diffminus)) < np.abs(np.mean(diffplus)): + newdiff[iPrims] = diffminus + newdiffAlt[iPrims] = diffplus + print_changes = True + # Detect if any dihedral pairs that share 3 atoms will clash (i.e. have the same value) somewhere along the pathway + clash = dihedral_clash(dih_by_abc, dih_by_bcd, newdiff) + clash_alt = dihedral_clash(dih_by_abc, dih_by_bcd, newdiffAlt) + # If the clash is removed in the alternate direction, then go in the alternate direction. + # Otherwise the clash is unavoidable (think swapping two H's bonded to the same C in cyclopropane). + if clash: + if clash_alt: + if not hasattr(self, 'dihedral_warning_printed'): + print("Warning: Unavoidable clash in %i dihedral angles." % len(clash)) + self.dihedral_warning_printed = True + else: + if self.verbose: print("Warning: Dihedral clash in one direction, reversing.") + newdiff = newdiffAlt.copy() + clash = clash_alt + if print_changes: + if self.verbose: print("Dihedral sign change detected around bond %i-%i %s" % (b+1, c+1, "with %i clashes" % len(clash) if len(clash) > 0 else "")) + for iPrim in iPrims: + Prim = self.Internals[iPrim] + if self.verbose >= 2: print("%-20s init final diff -> newdiff = % 10.5f % 10.5f % 10.5f -> % 10.5f" % (Prim, Prim.value(xyz1)*180/np.pi, Prim.value(xyz2)*180/np.pi, primdiff[iPrim]*180/np.pi, newdiff[iPrim]*180/np.pi)) + return newdiff + def GInverse(self, xyz): return self.GInverse_SVD(xyz) @@ -3215,19 +3284,15 @@ def addConstraint(self, cPrim, cVal=None, xyz=None): def reorderPrimitives(self): # Reorder primitives to be in line with cc's code newPrims = [] - newSync = [] for cPrim in self.cPrims: newPrims.append(cPrim) for typ in [Distance, DistanceDifference, ReducedDistance, Angle, LinearAngle, MultiAngle, OutOfPlane, Dihedral, MultiDihedral, CartesianX, CartesianY, CartesianZ, TranslationX, TranslationY, TranslationZ, RotationA, RotationB, RotationC]: for i, p in enumerate(self.Internals): if type(p) is typ and p not in self.cPrims: newPrims.append(p) - if i in self.sync: - newSync.append(len(newPrims)-1) if len(newPrims) != len(self.Internals): raise RuntimeError("Not all internal coordinates have been accounted for. You may need to add something to reorderPrimitives()") self.Internals = newPrims - self.sync = newSync def getConstraints_from(self, other): if other.haveConstraints(): @@ -3418,8 +3483,8 @@ def covalent(a, b): class DelocalizedInternalCoordinates(InternalCoordinates): - def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, rigid=False, remove_tr=False, cart_only=False, conmethod=0, repulsions=[], transfers=[], connect_isolated=True): - super(DelocalizedInternalCoordinates, self).__init__() + def __init__(self, molecule, imagenr=0, build=False, connect=False, addcart=False, constraints=None, cvals=None, rigid=False, remove_tr=False, cart_only=False, conmethod=0, repulsions=[], transfers=[], connect_isolated=True, verbose=0): + super(DelocalizedInternalCoordinates, self).__init__(verbose=verbose) # cart_only is just because of how I set up the class structure. if cart_only: return # Set the algorithm for constraint satisfaction. @@ -4106,9 +4171,6 @@ def calcDiff(self, coord1, coord2, sync=False): Answer = np.dot(PMDiff, self.Vecs) return np.array(Answer).flatten() - def syncDihedrals(self, xyz1, xyz2): - self.Prims.syncDihedrals(xyz1, xyz2) - def calculate(self, coords): """ Calculate the DLCs given the Cartesian coordinates. """ PrimVals = self.Prims.calculate(coords) @@ -4172,8 +4234,8 @@ class CartesianCoordinates(PrimitiveInternalCoordinates): This one does not support constraints, because that requires adding some primitive internal coordinates. """ - def __init__(self, molecule, **kwargs): - super(CartesianCoordinates, self).__init__(molecule) + def __init__(self, molecule, verbose=0, **kwargs): + super(CartesianCoordinates, self).__init__(molecule, verbose=verbose) self.Internals = [] self.cPrims = [] self.cVals = [] diff --git a/geometric/interpolate.py b/geometric/interpolate.py index f0b36d41..0acc11d3 100755 --- a/geometric/interpolate.py +++ b/geometric/interpolate.py @@ -35,6 +35,11 @@ def __init__(self, stream = sys.stdout): handler = RawStreamHandler() logger.addHandler(handler) +def write_coord_segment(M, coord_segment, output_filename): + M_copy = deepcopy(M) + M_copy.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_segment] + M_copy.write(output_filename) + # Need to refactor this so that it uses the minimum of the equilibrium bond length and the sum of covalent radii. def get_rab(M, i, j, bohr=True): if bohr: @@ -150,7 +155,7 @@ def find_path(mtx, thre=1): # If the starting point or end point is blocked, we can't escape. if not okay[n-1, n-1] or not okay[0, 0]: - raise RuntimeError("Spoo!") + raise RuntimeError("Failed to find a path through the diagnostic map!") okay2 = okay.copy() for niter in range(n**2): @@ -404,23 +409,20 @@ def dq_scale_prims(IC, dest_coords, curr_coords, scale_factors={}, sync=0): dq = np.dot(dqPrims, IC.Vecs) return dq -def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2): +def interpolate_segment(IC, curr_coords, dest_coords, nDiv, backward=False, rebuild_dlc=False, err_thre=1e-2, verbose=0, sync=0): if backward: tmp = curr_coords.copy() curr_coords = dest_coords.copy() dest_coords = tmp.copy() - sync = -1 - else: - sync = 1 dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) coord_segment = [curr_coords] for k in range(nDiv): if rebuild_dlc: IC.build_dlc(curr_coords) dq = IC.calcDiff(dest_coords, curr_coords, sync=sync) - new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=0) + new_coords = IC.newCartesian(curr_coords, dq/(nDiv-k), verbose=verbose) else: - new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=0) + new_coords = IC.newCartesian(curr_coords, dq/nDiv, verbose=verbose) coord_segment.append(new_coords) curr_coords = new_coords.copy() _, endpt_err = calc_drms_dmax(curr_coords, dest_coords, align=True) @@ -494,7 +496,7 @@ def print_map(mtx, title, colorscheme=0): print() class Interpolator(object): - def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, do_prealign=False): + def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=False, align_frags=False, verbose=0): # The input molecule; it should not be modified. self.M_in = deepcopy(M_in) # Check the length of the input molecule @@ -502,7 +504,7 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals if use_midframes: self.do_init = False assert len(self.M_in) >= 5, "When setting use_midframes, input molecule must have >= 5 structures" - assert do_prealign is False, "Do not pass do_prealign if using intermediate frames" + assert align_frags is False, "Do not pass align_frags if using intermediate frames" assert n_frames == 0, "Do not pass n_frames when setting use_midframes" self.n_frames = len(self.M_in) else: @@ -515,9 +517,9 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals self.M = deepcopy(self.M_in) self.n_atoms = len(self.M.elem) # Whether to pre-align the fragments in the reactant and product structure - self.do_prealign = do_prealign + self.align_frags = align_frags # Whether to align the input frames to the reactant structure upon input and before writing output - self.align_system = True if self.do_prealign else align_system + self.align_system = True if self.align_frags else align_system if self.align_system: self.M.align() # Atom pairs that are not bonded (resp. bonded) in either the reactant or product @@ -549,6 +551,7 @@ def __init__(self, M_in, n_frames = 50, use_midframes = False, align_system=Fals self.atom_pairs = [list(pair) for pair in atom_pairs.copy()] # The smaller of the distances at either endpoint for each pair self.min_enddists = np.min(np.array(self.enddists), axis=0) + self.verbose = verbose def get_splice_length(self): # Determine the splice length @@ -721,8 +724,6 @@ def prealign_fragments(self): xyz1_stage = xyz1.copy() IC0 = DelocalizedInternalCoordinates(M0, build=True, connect=False, addcart=False, connect_isolated=False) IC1 = DelocalizedInternalCoordinates(M1, build=True, connect=False, addcart=False, connect_isolated=False) - IC0.syncDihedrals(xyz0, xyz1) - IC1.syncDihedrals(xyz0, xyz1) self.endICs = [IC0, IC1] M_reac = None M_prod = None @@ -845,7 +846,6 @@ def initial_guess_one_method(self, method = 0): # Finalize the IC system. IC.Prims.Internals = newPrims IC.Prims.reorderPrimitives() - IC.Prims.syncDihedrals(xyz1, xyz0) IC.build_dlc(xyz_IC) # print("=== Primitives for method %i ===" % method) # print(IC.Prims) @@ -870,18 +870,26 @@ def initial_guess_one_method(self, method = 0): clash_pairs = [] nDiv = n_frames - 1 for clash_round in range(5): - coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True) + coord_segment, endpt_err, _ = interpolate_segment(IC, xyz0, xyz1, nDiv, backward=(method%2==1), rebuild_dlc=True, sync=1) dq_segment = np.array([IC_Chk.calcDiff(coord_segment[i], coord_segment[i+1]) for i in range(n_frames-1)]) + # Don't evaluate dq_segment for primitives that have a diagnostic of 2 or higher because the result will be invalid. + prim_diag = np.max([[Prim.diagnostic(coord_segment[i])[0] for Prim in IC_Chk.Internals] for i in range(n_frames)], axis=0) # Setting to -1e-6 essentially ignores the checking primitives if they evaluate to +/- inf. + dq_segment[:, prim_diag >= 2] = -1e-6 dq_segment[dq_segment == -np.inf] = -1e-6 dq_segment[dq_segment == np.inf] = 1e-6 dq_max = np.max(np.abs(dq_segment), axis=0) dq_med = np.median(np.abs(dq_segment), axis=0) dq_med[dq_med<0.01] = 0.01 dq_ratio = max(dq_max/dq_med) - + M.xyzs = [x.reshape(-1, 3)*bohr2ang for x in coord_segment] M.comms = ['Interpolated frame %i' % k for k in range(len(coord_segment))] + + # Perform equal spacing of the pathway according to the largest change in any primitive coordinate + dq_relative = dq_segment / dq_med[np.newaxis, :] + M = EqualSpacing(M, RMSD=False, align=False, custom=np.max(dq_relative, axis=1)) + # Fail if the endpt_err is greater than the threshold if endpt_err > err_thre: M.align() @@ -997,7 +1005,6 @@ def build_IC_segments(self): for i in range(len(M)): M_i = M[i] IC = DelocalizedInternalCoordinates(M_i, build=True, connect=False, addcart=False, transfers=transfers, connect_isolated=False) - IC.Prims.syncDihedrals(xyzs[-1], xyzs[0]) IC.build_dlc(xyzs[i]) ICs.append(IC) @@ -1011,6 +1018,42 @@ def build_IC_segments(self): print_map(diag_matrix, "IC diagnostic map", colorscheme=0) segments = find_path(diag_matrix) + + # For each point k on the path, compute the maximum dihedral angle change for i ... k and k ... j + # And if the dihedral angle change i ... j is greater than the threshold, split at k + # Repeat these steps until there are no dihedral angle changes larger than the threshold + # in the intervals i ... k1 ... k2 ... j . + while True: + splitted = False + new_segments = [] + for isegment, (i, j) in enumerate(segments): + dih_ij = [] + for k in [i, j]: + for Prim in ICs[k].Prims.Internals: + if type(Prim) is Dihedral and Prim not in dih_ij: + dih_ij.append(Prim) + if len(dih_ij) == 0: + new_segments.append((i, j)) + continue + dihArcs = [[0.0 for Prim in dih_ij]] + for k in range(i, j-1): + dihArcs.append([np.abs(Prim.calcDiff(xyzs[k+1], xyzs[k]) * (Prim.diagnostic(xyzs[k+1])[0] < 2)) for Prim in dih_ij]) + dihArcs = np.cumsum(np.array(dihArcs), axis=0) + dMax = np.max(np.abs(dihArcs)) + dThre = 5*np.pi/6 + if dMax > dThre: + dSplits = [] + for k in range(j-i): + dSplits.append(max(np.max(dihArcs[k]), np.max(dihArcs[-1]-dihArcs[k]))) + split_k = i + np.argmin(dSplits) + print("maximum dihedral difference between %i-%i is %.3f; splitting at %i (reduce to %.3f)" % (i, j, dMax, split_k, np.min(dSplits))) + new_segments.append((i, split_k)) + new_segments.append((split_k+1, j)) + else: + new_segments.append((i, j)) + if segments == new_segments: break + segments = new_segments[:] + segments_filled, segments_spliced = fill_splice_segments(segments, splice_length) print("The frame numbers of the spliced segments are:") print(segments_spliced) @@ -1057,11 +1100,27 @@ def splice_iterations(self): nDiv = j-i success = False for c, (iic, IC) in enumerate(segment_to_ICs[a]): + if self.verbose: + print("In splice_iterations: c = %i, iic = %i, IC = %s" % (c, iic, IC.__repr__())) + if self.verbose >= 2: + vali = IC.Prims.calculate(xyzi) + valj = IC.Prims.calculate(xyzj) + primDiff = IC.Prims.calcDiff(xyzj, xyzi, sync=0) + for iPrim in range(len(IC.Prims.Internals)): + print("%3i %25s % 9.5f % 9.5f % 9.5f" % (iPrim, IC.Prims.Internals[iPrim], vali[iPrim], valj[iPrim], primDiff[iPrim])) attempt = 0 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=backwards[a][c], rebuild_dlc=False, verbose=self.verbose, sync=0) + if self.verbose: + print("forward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) + if self.verbose >= 2: + write_coord_segment(M, coord_segment, "cycle%i_segment%i_IC%i_attempt%i.xyz" % (cycle, a, c, attempt)) if success: break attempt = 1 - coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False) + coord_segment, endpt_err, success = interpolate_segment(IC, xyzi, xyzj, nDiv, backward=not backwards[a][c], rebuild_dlc=False, verbose=self.verbose, sync=0) + if self.verbose: + print("backward direction: endpt_err = %8.3f success = %i" % (endpt_err, success)) + if self.verbose >= 2: + write_coord_segment(M, coord_segment, "cycle%i_segment%i_IC%i_attempt%i.xyz" % (cycle, a, c, attempt)) if success: backwards[a][c] = not backwards[a][c] break @@ -1124,8 +1183,13 @@ def splice_iterations(self): else: increase_count += 1 decrease_count = 0 - print(">> Mismatch fails to decrease; increasing damping") - damping *= 0.8 + min_damping = 0.2 + if damping <= min_damping: + print(">> Mismatch fails to decrease but damping at maximum (% .3e) - continuing" % damping) + else: + damping = max(min_damping, damping*0.8) + print(">> Mismatch fails to decrease; increasing damping (currently % .3e)" % damping) + last_max_mismatch = max_mismatch segment_endpoints, _ = splice_segment_endpoints(coord_segments, segment_endpoints, segments_spliced, splice_length, damping, verbose=False) @@ -1144,7 +1208,6 @@ def splice_iterations(self): new_repulsions = sorted(list(set(IC.Prims.repulsions).union(set(clash_pairs)))) IC1 = DelocalizedInternalCoordinates(M_in[iic], build=True, connect=False, addcart=False, repulsions=new_repulsions, transfers=IC.Prims.transfers, connect_isolated=False) - IC1.syncDihedrals(xyz1, xyz0) IC1.build_dlc(M_in.xyzs[iic].flatten()*ang2bohr) rebuilt_segment_to_ICs.append((iic, IC1)) segment_to_ICs[a] = rebuilt_segment_to_ICs @@ -1167,7 +1230,7 @@ def split_IC_segments(self, fail_segment): self.segment_endpoints = get_segment_endpoints(self.M, self.segments_spliced) def run_workflow(self): - if self.do_prealign: + if self.align_frags: self.prealign_fragments() if self.do_init: self.initial_guess() @@ -1183,9 +1246,9 @@ def run_workflow(self): def main(): args = parse_interpolate_args(sys.argv[1:]) params = InterpParams(**args) - M0 = Molecule(sys.argv[1]) - #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, do_prealign=False) - interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align, do_prealign=params.prealign) + M0 = Molecule(args['input']) + #interpolator = Interpolator(M0, use_midframes=True, n_frames=0, align_system=True, align_frags=False) + interpolator = Interpolator(M0, n_frames= params.nframes, use_midframes=params.optimize, align_system=params.align_system, align_frags=params.align_frags, verbose=params.verbose) interpolator.run_workflow() if __name__ == "__main__": diff --git a/geometric/molecule.py b/geometric/molecule.py index 077cd295..609f24dd 100644 --- a/geometric/molecule.py +++ b/geometric/molecule.py @@ -916,7 +916,7 @@ def arc(Mol, begin=None, end=None, RMSD=True, align=True): Arc = np.array(Arc) return Arc -def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): +def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False, custom=None): """ Equalize the spacing of frames in a trajectory with linear interpolation. This is done in a very simple way, first calculating the arc length @@ -935,6 +935,8 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): Return a Molecule object with this number of frames. RMSD : bool Use RMSD in the arc length calculation. + custom : np.array or None + Provide custom spacings between frames. Returns ------- @@ -943,7 +945,12 @@ def EqualSpacing(Mol, frames=0, dx=0, RMSD=True, align=True, spline=False): or with equally spaced frames. """ import scipy.interpolate - ArcMol = arc(Mol, RMSD=RMSD, align=align) + if type(custom) is np.ndarray: + ArcMol = custom.copy() + if ArcMol.shape != (len(Mol)-1, ): + raise RuntimeError("Custom spacing needs to match Molecule-length - 1") + else: + ArcMol = arc(Mol, RMSD=RMSD, align=align) ArcMolCumul = np.insert(np.cumsum(ArcMol), 0, 0.0) if frames != 0 and dx != 0: logger.error("Provide dx or frames or neither") @@ -2284,6 +2291,139 @@ def build_topology(self, force_bonds=True, bond_order=0.0, metal_bo_factor=0.5, # Deprecated in networkx 2.2 # self.molecules = list(nx.connected_component_subgraphs(G)) + def group_atoms_by_topology(self, bond_lim=10, verbose=False): + """ + Determine topologically equivalent atoms. + + bond_lim: Limit on the size of the fingerprint (default 10 bonds). + + Works like this: Suppose we have a molecular graph given as + H H H + \ \ / + H--C--C##C--H C==C==C + / / \ + H H H + + with numbers given as + + 4 9 13 + \ \ / + 5--3--2##0--1 8==7==11 + / / \ + 6 10 12 + + The goal is to return a list of chemically equivalent atom groups as: + [[0], [1], [2], [3], [4, 5, 6], [7], [8, 11], [9, 10, 12, 13]] + + (Note: This grouping is by connectivity only, and does not distinguish + between bonds of different orders, enantiomers or cis/trans isomerism.) + + If we assign a fingerprint to an atom, given by a list where element 'i' + is the concatenated symbols of the atoms separated by 'i' bonds from that atom, + the fingerprints for the above system would be: + + 0 C-CH-C-HHH + 1 H-C-C-C-HHH + 2 C-CC-HHHH + 3 C-CHHH-C-H + 4 H-C-CHH-C-H + 5 H-C-CHH-C-H + 6 H-C-CHH-C-H + 7 C-CC-HHHH + 8 C-CHH-C-HH + 9 H-C-CH-C-HH + 10 H-C-CH-C-HH + 11 C-CHH-C-HH + 12 H-C-CH-C-HH + 13 H-C-CH-C-HH + + and a corresponding grouping of: + [[0], [1], [2, 7], [3], [4, 5, 6], [8, 11], [9, 10, 12, 13]] + + This doesn't distinguish atoms uniquely because atoms 2 and 7 (the central carbons) + have the same fingerprint even though they are chemically different. + This is because the fingerprint does not take into account the different topology + around carbons 0, 3 (i.e. bonded to 1 and 3 Hs respectively) and carbons + 8, 11 (i.e. both bonded to two Hs). + + The solution is to do another iteration of the above but using the fingerprints + in place of the atomic symbols. This creates new, larger fingerprints as: + + 0 C-CH-C-HHH_C-CC-HHHH,H-C-C-C-HHH_C-CHHH-C-H_H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 1 H-C-C-C-HHH_C-CH-C-HHH_C-CC-HHHH_C-CHHH-C-H_H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 2 C-CC-HHHH_C-CH-C-HHH,C-CHHH-C-H_H-C-C-C-HHH,H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H + 3 C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 4 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 5 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 6 H-C-CHH-C-H_C-CHHH-C-H_C-CC-HHHH,H-C-CHH-C-H,H-C-CHH-C-H_C-CH-C-HHH_H-C-C-C-HHH + 7 C-CC-HHHH_C-CHH-C-HH,C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH,H-C-CH-C-HH,H-C-CH-C-HH + 8 C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 9 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 10 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 11 C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 12 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + 13 H-C-CH-C-HH_C-CHH-C-HH_C-CC-HHHH,H-C-CH-C-HH_C-CHH-C-HH_H-C-CH-C-HH,H-C-CH-C-HH + + and a corresponding grouping of: + [[0], [1], [2], [3], [4, 5, 6], [7], [8, 11], [9, 10, 12, 13]] + + Now atoms 2 and 7 are considered to be different, and the differences in the topology + of the atoms bonded to atoms 2 and 7 are built into the representation. It encodes that + atom 2 is bonded to atoms C-CH-C-HHH,C-CHHH-C-H + atom 7 is bonded to atoms C-CHH-C-HH,C-CHH-C-HH. + Note the separator characters have changed from '', '-' to ',', '_' as well. + + This process can be repeated until a self-consistent set of groups is found. + An upper limit on the number of cycles is imposed because the label size can blow up + very quickly. + """ + + ids = [self.elem[i] for i in range(self.na)] + old_groups = None + cycle = 0 + + bond_seps = ['-', '_', '#', '@', '$'] + id_seps = ['', ',', '.', ';', ':'] + while True: + group_dict = {} + for i in range(self.na): + group_dict.setdefault(ids[i], []).append(i) + groups = sorted(list(group_dict.values())) + if groups == old_groups: break + if cycle == 5: + raise RuntimeError("Not designed to go for more than 5 cycles.") + if verbose: + logger.info("===Cycle %i===\n" % cycle) + logger.info("IDs of each atom:\n") + for i in range(self.na): + logger.info("%3i %s\n" % (i, ids[i])) + logger.info("Atom groups:\n") + logger.info(str(groups)+"\n") + + # Dictionary that looks like: + # { anum1 : {anum2 : path_12}} + spl = dict(nx.all_pairs_shortest_path_length(self.topology)) + + new_ids = [] + + for key in sorted(spl.keys()): + val = spl[key] + fp_dict = {} + for key2, val2 in val.items(): + if val2 > (bond_lim-cycle): continue + fp_dict.setdefault(val2, []).append(ids[key2]) + fp_dict[val2].sort() + fp_list = [] + for dist in sorted(fp_dict.keys()): + fp_list.append(id_seps[cycle].join(fp_dict[dist])) + + new_ids.append(bond_seps[cycle].join(fp_list)) + + old_groups = copy.deepcopy(groups) + ids = new_ids[:] + cycle += 1 + return groups + def distance_matrix(self, pbc=True): """ Obtain distance matrix between all pairs of atoms. """ AtomIterator = np.ascontiguousarray(np.vstack((np.fromiter(itertools.chain(*[[i]*(self.na-i-1) for i in range(self.na)]),dtype=int), diff --git a/geometric/params.py b/geometric/params.py index d46ad8cd..41b59162 100644 --- a/geometric/params.py +++ b/geometric/params.py @@ -262,14 +262,14 @@ def __init__(self, **kwargs): self.optimize = kwargs.get('optimize', False) if self.optimize: self.nframes = 0 - self.prealign = False + self.align_frags = False else: # Number of frames that will be used for the interpolation. - self.nframes = kwargs.get('nframes', False) + self.nframes = kwargs.get('nframes', 50) # Whether we want to align the molecules in reactant and product frames. - self.prealign = kwargs.get('prealign', False) + self.align_frags = kwargs.get('align_frags', False) # Whether we want to align the initial interpolate trajectory before optimization. - self.align = kwargs.get('align', False) + self.align_system = kwargs.get('align_system', True) # Verbose printout self.verbose = kwargs.get('verbose', 0) @@ -504,8 +504,9 @@ def parse_neb_args(*args): grp_nebparam.add_argument('--trust', type=float, help='Starting trust radius, defaults to 0.1 (energy minimization) or 0.01 (TS optimization).\n ') grp_nebparam.add_argument('--tmax', type=float, help='Maximum trust radius, defaults to 0.3 (energy minimization) or 0.03 (TS optimization).\n ') grp_nebparam.add_argument('--tmin', type=float, help='Minimum trust radius, do not reject steps trust radius is below this threshold (method-dependent).\n ') - grp_nebparam.add_argument('--skip', action='store_true', help='Skip Hessian updates that would introduce negative eigenvalues.') + grp_nebparam.add_argument('--skip', action='store_true', help='Skip Hessian updates that would introduce negative eigenvalues.\n ') grp_nebparam.add_argument('--epsilon', type=float, help='Small eigenvalue threshold for resetting Hessian, default 1e-5.\n ') + grp_nebparam.add_argument('--port', type=int, help='Work Queue port used to distribute singlepoint calculations. Workers must be started separately.\n') grp_output = parser.add_argument_group('output', 'Control the format and amount of the output') grp_output.add_argument('--prefix', type=str, help='Specify a prefix for log file and temporary directory.\n' @@ -513,7 +514,6 @@ def parse_neb_args(*args): grp_output.add_argument('--verbose', type=int, help='Set to positive for more verbose printout.\n' '0 = Default print level. 1 = Basic info about optimization step.\n' '2 = Include microiterations. 3 = Lots of printout from low-level functions.\n ') - grp_help = parser.add_argument_group('help', 'Get help') grp_help.add_argument('-h', '--help', action='help', help='Show this help message and exit') @@ -550,8 +550,8 @@ def parse_interpolate_args(*args): 'The followting two arguments, nframes and prealign, will be ignored.\n' 'The input xyz file must contain 5 or more structures.\n') grp_univ.add_argument('--nframes', type=int, help='Number of frames, needs to be bigger than 5, that will be used to interpolate, default 50\n') - grp_univ.add_argument('--prealign', type=str2bool, help='Provide "yes" to align the molecules in reactant and product before the interpolation.\n') - grp_univ.add_argument('--align', type=str2bool, help='Provide "yes" to align the initial interpolated trajectory.\n') + grp_univ.add_argument('--align_frags', type=str2bool, help='Provide "yes" to align fragments in reactant and product structure prior to interpolation.\n') + grp_univ.add_argument('--align_system', type=str2bool, help='Provide "yes" to align the entire system prior to interpolation.\n') grp_output = parser.add_argument_group('output', 'Control the format and amount of the output') grp_output.add_argument('--prefix', type=str, help='Specify a prefix for log file and temporary directory.\n' @@ -570,4 +570,6 @@ def parse_interpolate_args(*args): if v is not None: args_dict[k] = v + print("LPW debug:", args_dict) + return args_dict diff --git a/geometric/tests/test_neb.py b/geometric/tests/test_neb.py index dace8641..1dd89a3c 100644 --- a/geometric/tests/test_neb.py +++ b/geometric/tests/test_neb.py @@ -2,11 +2,13 @@ A set of tests for NEB calculations """ -import os, json, copy +import os, json, copy, shutil from . import addons +import pytest import geometric import tempfile import numpy as np +import subprocess localizer = addons.in_folder datad = addons.datad @@ -66,6 +68,46 @@ def test_hcn_neb_optimize(localizer): assert final_chain.maxg < params.maxg assert final_chain.avgg < params.avgg +class TestPsi4WorkQueueNEB: + + """ Tests are put into class so that the fixture can terminate the worker process. """ + + @pytest.fixture(autouse=True) + def work_queue_cleanup(self): + self.workers = None + yield + if self.workers is not None: + for worker in self.workers: + worker.terminate() + + @addons.using_psi4 + @addons.using_workqueue + def test_psi4_work_queue_neb(self, localizer): + M, engine = geometric.prepare.get_molecule_engine( + input=os.path.join(datad, "hcn_neb_input.psi4in"), + chain_coords=os.path.join(datad, "hcn_neb_input.xyz"), + images=11, + neb=True, + engine="psi4", + ) + params = geometric.params.NEBParams(**{"optep": True, "verbose": 1}) + chain = geometric.neb.ElasticBand( + M, engine=engine, tmpdir=tempfile.mkdtemp(), params=params, plain=0 + ) + + geometric.nifty.createWorkQueue(9191, debug=False) + wq = geometric.nifty.getWorkQueue() + worker_program = geometric.nifty.which('work_queue_worker') + # Assume 4 threads are available + self.workers = [subprocess.Popen([os.path.join(worker_program, "work_queue_worker"), "localhost", str(wq.port)], + stdout=subprocess.PIPE) for i in range(4)] + + final_chain, optCycle = geometric.neb.OptimizeChain(chain, engine, params) + + assert optCycle < 10 + assert final_chain.maxg < params.maxg + assert final_chain.avgg < params.avgg + geometric.nifty.destroyWorkQueue() @addons.using_qcelemental def test_hcn_neb_service_arrange(localizer): @@ -75,7 +117,7 @@ def test_hcn_neb_service_arrange(localizer): from qcelemental.models import Molecule as qcmol chain_M = geometric.molecule.Molecule(os.path.join(datad, "hcn_neb_service.xyz")) - coords = [M.xyzs for M in chain_M] + coords = [M.xyzs/geometric.nifty.bohr2ang for M in chain_M] qcel_mols = [ qcmol( symbols=chain_M[0].elem,