From a48adefa767acc2fd0e9b1db13db03f0f1c3adbf Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Thu, 4 Jul 2024 21:36:39 +0200 Subject: [PATCH 1/6] improved docstring and added type-hints --- autoplex/benchmark/phonons/utils.py | 15 ++-- autoplex/data/common/utils.py | 35 +++++----- autoplex/data/phonons/utils.py | 6 +- autoplex/fitting/common/jobs.py | 2 +- autoplex/fitting/common/regularization.py | 27 +++++--- autoplex/fitting/common/utils.py | 83 +++++++++++++---------- 6 files changed, 100 insertions(+), 68 deletions(-) diff --git a/autoplex/benchmark/phonons/utils.py b/autoplex/benchmark/phonons/utils.py index 7accb5b92..0a7014637 100644 --- a/autoplex/benchmark/phonons/utils.py +++ b/autoplex/benchmark/phonons/utils.py @@ -1,8 +1,15 @@ """Utility functions for benchmarking jobs.""" +from __future__ import annotations + +from typing import TYPE_CHECKING + import matplotlib.pyplot as plt import numpy as np -from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine + +if TYPE_CHECKING: + from matplotlib.figure import Figure + from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine from pymatgen.phonon.plotter import PhononBSPlotter @@ -10,7 +17,7 @@ def get_rmse( ml_bs: PhononBandStructureSymmLine, dft_bs: PhononBandStructureSymmLine, q_dependent_rmse: bool = False, -): +) -> float | list[float]: """ Compute root mean squared error (rmse) between DFT and ML phonon band-structure. @@ -44,7 +51,7 @@ def rmse_qdep_plot( which_q_path=1, file_name="rms.pdf", img_format="pdf", -): +) -> plt: """ Save q dependent root mean squared error plot between DFT and ML phonon band-structure. @@ -94,7 +101,7 @@ def compare_plot( ml_bs: PhononBandStructureSymmLine, dft_bs: PhononBandStructureSymmLine, file_name: str = "band_comparison.pdf", -): +) -> Figure: """ Save DFT and ML phonon band-structure overlay plot for visual comparison. diff --git a/autoplex/data/common/utils.py b/autoplex/data/common/utils.py index 16949df7e..acd966139 100644 --- a/autoplex/data/common/utils.py +++ b/autoplex/data/common/utils.py @@ -85,7 +85,7 @@ def scale_cell( volume_scale_factor_range: list[float] | None = None, n_structures: int = 10, volume_custom_scale_factors: list[float] | None = None, -): +) -> list[Structure]: """ Take in a pymatgen Structure object and generates stretched or compressed structures. @@ -154,9 +154,9 @@ def scale_cell( return distorted_cells -def check_distances(structure: Structure, min_distance: float = 1.5): +def check_distances(structure: Structure, min_distance: float = 1.5) -> bool: """ - Take in a pymatgen Structure object and checks distances between atoms using minimum image convention. + Take in a pymatgen Structure object and check minimum distances between atoms using minimum image convention. Useful after distorting cell angles and rattling to check atoms aren't too close. @@ -191,7 +191,7 @@ def random_vary_angle( w_angle: list[float] | None = None, n_structures: int = 8, angle_max_attempts: int = 1000, -): +) -> list[Structure]: """ Take in a pymatgen Structure object and generates angle-distorted structures. @@ -237,10 +237,10 @@ def random_vary_angle( volume_custom_scale_factors=[1.03], ) - distorted_cells = AseAtomsAdaptor.get_atoms(distorted_cells[0]) + distorted_supercells: Atoms = AseAtomsAdaptor.get_atoms(distorted_cells[0]) - # getting stretched cell out of array - newcell = distorted_cells.cell.cellpar() + # getting stretched supercell out of array + newcell = distorted_supercells.cell.cellpar() # current angles alpha = atoms_copy.cell.cellpar()[3] @@ -287,7 +287,7 @@ def std_rattle( n_structures: int = 5, rattle_std: float = 0.01, rattle_seed: int = 42, -): +) -> list[Structure]: """ Take in a pymatgen Structure object and generates rattled structures. @@ -331,7 +331,7 @@ def mc_rattle( min_distance: float = 1.5, rattle_seed: int = 42, rattle_mc_n_iter: int = 10, -): +) -> list[Structure]: """ Take in a pymatgen Structure object and generates rattled structures. @@ -375,7 +375,7 @@ def mc_rattle( return [AseAtomsAdaptor.get_structure(xtal) for xtal in mc_rattle] -def extract_base_name(filename, is_out=False): +def extract_base_name(filename, is_out=False) -> str: """ Extract the base of a file name to easier manipulate other file names. @@ -401,7 +401,7 @@ def extract_base_name(filename, is_out=False): return "A problem with the files occurred." -def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005): +def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005) -> None: """ Filter data outliers per energy criteria and write them into files. @@ -457,7 +457,9 @@ def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005): ) -def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1): +def filter_outlier_forces( + in_file, out_file, symbol="Si", criteria: float = 0.1 +) -> None: """ Filter data outliers per force criteria and write them into files. @@ -526,13 +528,14 @@ def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1) ) -# copied from libatoms GAP tutorial page and adjusted def energy_plot( in_file, out_file, ax, title: str = "Plot of energy", label: str = "energy" -): +) -> None: """ Plot the distribution of energy per atom on the output vs the input. + Adapted and adjusted from libatoms GAP tutorial page https://libatoms.github.io/GAP/gap_fitting_tutorial.html. + Parameters ---------- in_file: @@ -610,7 +613,7 @@ def force_plot( symbol: str = "Si", title: str = "Plot of force", label: str = "force for ", -): +) -> float: """ Plot the distribution of force components per atom on the output vs the input. @@ -700,7 +703,7 @@ def plot_energy_forces( species_list: list | None = None, train_name: str = "train.extxyz", test_name: str = "test.extxyz", -): +) -> None: """ Plot energy and forces of the data. diff --git a/autoplex/data/phonons/utils.py b/autoplex/data/phonons/utils.py index 0ac58b380..0755c796f 100644 --- a/autoplex/data/phonons/utils.py +++ b/autoplex/data/phonons/utils.py @@ -18,7 +18,11 @@ def ml_phonon_maker_preparation( bulk_relax_maker: ForceFieldRelaxMaker, phonon_displacement_maker: ForceFieldStaticMaker, static_energy_maker: ForceFieldStaticMaker, -): +) -> tuple[ + ForceFieldRelaxMaker | None, + ForceFieldStaticMaker | None, + ForceFieldStaticMaker | None, +]: """ Prepare the MLPhononMaker for the respective MLIP model. diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py index f82f95bec..de175620f 100644 --- a/autoplex/fitting/common/jobs.py +++ b/autoplex/fitting/common/jobs.py @@ -33,7 +33,7 @@ def machine_learning_fit( **kwargs, ): """ - Maker for fitting potential(s). + Job for fitting potential(s). Parameters ---------- diff --git a/autoplex/fitting/common/regularization.py b/autoplex/fitting/common/regularization.py index 9c4dd56e5..3b8ad7593 100644 --- a/autoplex/fitting/common/regularization.py +++ b/autoplex/fitting/common/regularization.py @@ -5,10 +5,14 @@ import traceback from contextlib import suppress +from typing import TYPE_CHECKING, Any import numpy as np from scipy.spatial import ConvexHull, Delaunay +if TYPE_CHECKING: + from ase import Atoms + def set_sigma( atoms, @@ -21,7 +25,7 @@ def set_sigma( element_order=None, max_energy=20.0, config_type_override=None, -): +) -> list[Atoms]: """ Handle automatic regularisation based on distance to convex hull, amongst other things. @@ -216,6 +220,7 @@ def set_sigma( def get_convex_hull(atoms, energy_name="energy", **kwargs): + # CE I don't get what the function returns """ Calculate simple linear (E,V) convex hull. @@ -276,7 +281,7 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs): return lower_half_hull_points, p -def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs): +def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) -> float: """ Calculate the distance of a structure to the linear convex hull in energy. @@ -315,7 +320,7 @@ def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs): ) -def get_intersect(a1, a2, b1, b2): +def get_intersect(a1, a2, b1, b2) -> tuple[float, float] | tuple: """ Return the point of intersection of the lines passing through a2,a1 and b2,b1. @@ -339,7 +344,7 @@ def get_intersect(a1, a2, b1, b2): return x / z, y / z -def get_x(at, element_order=None): +def get_x(at, element_order=None) -> float | int: """ Calculate the mole-fraction of a structure. @@ -379,7 +384,7 @@ def get_x(at, element_order=None): def label_stoichiometry_volume( ats, isolated_atoms_energies, e_name, element_order=None -): +): # CE I don't get what the function returns """ Calculate the stoichiometry, energy, and volume coordinates for forming the convex hull. @@ -412,7 +417,7 @@ def label_stoichiometry_volume( return p.T[:, np.argsort(p.T[0])].T -def point_in_triangle_2D(p1, p2, p3, pn): +def point_in_triangle_2D(p1, p2, p3, pn) -> bool: """ Check if a point is inside a triangle in 2D. @@ -449,7 +454,7 @@ def point_in_triangle_2D(p1, p2, p3, pn): ) -def point_in_triangle_ND(pn, *preg): +def point_in_triangle_ND(pn, *preg) -> bool: """ Check if a point is inside a region of hyperplanes in N dimensions. @@ -467,7 +472,7 @@ def point_in_triangle_ND(pn, *preg): return hull.find_simplex(pn) >= 0 -def calculate_hull_3D(p): +def calculate_hull_3D(p) -> ConvexHull: """ Calculate the convex hull in 3D. @@ -492,7 +497,7 @@ def calculate_hull_3D(p): return hull -def calculate_hull_ND(p): +def calculate_hull_ND(p) -> ConvexHull: """ Calculate the convex hull in ND (N>=3). @@ -531,7 +536,7 @@ def calculate_hull_ND(p): def get_e_distance_to_hull_3D( hull, at, isolated_atoms_energies=None, energy_name="energy", element_order=None -): +) -> float: """ Calculate the energy distance to the convex hull in 3D. @@ -579,7 +584,7 @@ def get_e_distance_to_hull_3D( return 1e6 -def piecewise_linear(x, vals): +def piecewise_linear(x, vals) -> Any: """ Piecewise linear. diff --git a/autoplex/fitting/common/utils.py b/autoplex/fitting/common/utils.py index d0c93df4b..aee17eb36 100644 --- a/autoplex/fitting/common/utils.py +++ b/autoplex/fitting/common/utils.py @@ -14,6 +14,11 @@ from functools import partial from itertools import combinations from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ase.atoms import Atom + from pymatgen.core import Structure import ase import lightning as pl @@ -61,7 +66,7 @@ def gap_fitting( train_name: str = "train.extxyz", test_name: str = "test.extxyz", fit_kwargs: dict | None = None, # pylint: disable=E3701 -): +) -> dict: """ GAP fit and validation job. @@ -229,7 +234,7 @@ def ace_fitting( solver: str = "BLR", isolated_atoms_energies: dict | None = None, num_processes: int = 32, -): +) -> dict: """ Perform the ACE (Atomic Cluster Expansion) potential fitting. @@ -389,7 +394,7 @@ def nequip_fitting( default_dtype: str = "float32", isolated_atoms_energies: dict | None = None, device: str = "cuda", -): +) -> dict: """ Perform the NequIP potential fitting. @@ -629,7 +634,7 @@ def m3gnet_fitting( max_n: int = 4, device: str = "cuda", test_equal_to_val: bool = True, -): +) -> dict: """ Perform the M3GNet potential fitting. @@ -944,7 +949,7 @@ def mace_fitting( loss: str = None, default_dtype: str = None, device: str = "cuda", -): +) -> dict: """ Perform the MACE potential fitting. @@ -1026,7 +1031,7 @@ def mace_fitting( } -def check_convergence(test_error): +def check_convergence(test_error) -> bool: """ Check the convergence of the fit. @@ -1046,7 +1051,7 @@ def check_convergence(test_error): return convergence -def load_gap_hyperparameter_defaults(gap_fit_parameter_file_path: str | Path): +def load_gap_hyperparameter_defaults(gap_fit_parameter_file_path: str | Path) -> dict: """ Load gap fit default parameters from the json file. @@ -1069,7 +1074,7 @@ def gap_hyperparameter_constructor( include_two_body: bool = False, include_three_body: bool = False, include_soap: bool = False, -): +) -> list: """ Construct a list of arguments needed to execute gap potential from the parameters' dict. @@ -1129,7 +1134,7 @@ def gap_hyperparameter_constructor( return [*general, gap_hyperparameters] -def get_list_of_vasp_calc_dirs(flow_output): +def get_list_of_vasp_calc_dirs(flow_output) -> list[str]: """ Return a list of vasp_calc_dirs from PhononDFTMLDataGenerationFlow output. @@ -1143,7 +1148,7 @@ def get_list_of_vasp_calc_dirs(flow_output): list. A list of vasp_calc_dirs """ - list_of_vasp_calc_dirs = [] + list_of_vasp_calc_dirs: list[str] = [] for output in flow_output.values(): for output_type, dirs in output.items(): if output_type != "phonon_data" and isinstance(dirs, list): @@ -1163,7 +1168,7 @@ def vaspoutput_2_extended_xyz( regularization: float = 0.1, f_min: float = 0.01, # unit: eV Å-1 atom_wise_regularization: bool = True, -): +) -> None: """ Parse all VASP output files (vasprun.xml/OUTCAR) and generates a vasp_ref.extxyz. @@ -1227,7 +1232,7 @@ class Species: def __init__(self, atoms): self.atoms = atoms - def get_species(self): + def get_species(self) -> list[str]: """ Get species. @@ -1236,7 +1241,7 @@ def get_species(self): species_list: a list of species. """ - species_list = [] + species_list: list[str] = [] for atom in self.atoms: symbol_all = atom.get_chemical_symbols() @@ -1245,7 +1250,7 @@ def get_species(self): return species_list - def find_element_pairs(self, symbol_list=None): + def find_element_pairs(self, symbol_list=None) -> list: """ Find element pairs. @@ -1264,7 +1269,7 @@ def find_element_pairs(self, symbol_list=None): return list(combinations(species_list, 2)) - def get_number_of_species(self): + def get_number_of_species(self) -> int: """ Get number of species. @@ -1275,7 +1280,7 @@ def get_number_of_species(self): """ return int(len(self.get_species())) - def get_species_Z(self): + def get_species_Z(self) -> str: """ Get species Z. @@ -1297,7 +1302,7 @@ def get_species_Z(self): return species_Z -def flatten(atoms_object, recursive=False): +def flatten(atoms_object, recursive=False) -> list[str | bytes | Atoms] | list: """ Flatten an iterable fully, but excluding Atoms objects. @@ -1312,7 +1317,7 @@ def flatten(atoms_object, recursive=False): a flattened object, excluding the Atoms objects. """ - iteration_list = [] + iteration_list: list[str | bytes | Atoms] | list = [] if recursive: for element in atoms_object: @@ -1327,7 +1332,7 @@ def flatten(atoms_object, recursive=False): return [item for sublist in atoms_object for item in sublist] -def gcm3_to_Vm(gcm3, mr, n_atoms=1): +def gcm3_to_Vm(gcm3, mr, n_atoms=1) -> float: """ Convert gcm3 to Vm. @@ -1347,7 +1352,7 @@ def gcm3_to_Vm(gcm3, mr, n_atoms=1): return 1 / (n_atoms * (gcm3 / mr) * 6.022e23 / (1e8) ** 3) -def get_atomic_numbers(species): +def get_atomic_numbers(species) -> list[int]: """ Get atomic numbers. @@ -1370,7 +1375,13 @@ def get_atomic_numbers(species): return atom_numbers -def stratified_dataset_split(atoms, split_ratio): +def stratified_dataset_split( + atoms, split_ratio +) -> tuple[ + list[Atom | Atoms] + | list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]], + list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]], +]: """ Split the dataset. @@ -1420,7 +1431,7 @@ def stratified_dataset_split(atoms, split_ratio): return train_structures, test_structures -def data_distillation(vasp_ref_dir, f_max): +def data_distillation(vasp_ref_dir, f_max) -> list[Atom | Atoms]: """ For data distillation. @@ -1454,7 +1465,7 @@ def data_distillation(vasp_ref_dir, f_max): return atoms_distilled -def energy_remain(in_file): +def energy_remain(in_file) -> float: """ Plot the distribution of energy per atom on the output vs. the input. @@ -1494,7 +1505,7 @@ def energy_remain(in_file): return rms["rmse"] -def extract_gap_label(xml_file_path): +def extract_gap_label(xml_file_path) -> str: """ Extract GAP label. @@ -1514,7 +1525,7 @@ def extract_gap_label(xml_file_path): return root.tag -def plot_convex_hull(all_points, hull_points): +def plot_convex_hull(all_points, hull_points) -> None: """ Plot convex hull. @@ -1588,7 +1599,7 @@ def calculate_delta(atoms_db: list[Atoms], e_name: str) -> tuple[float, ndarray] return es_var / avg_neigh, num_triplet -def compute_pairs_triplets(atoms): +def compute_pairs_triplets(atoms) -> list[float]: """ Calculate the number of pairwise and triplet within a cutoff distance for a given list of atoms. @@ -1618,7 +1629,7 @@ def compute_pairs_triplets(atoms): return [num_pair, num_triplet] -def run_ace(num_processes: int, script_name: str): +def run_ace(num_processes: int, script_name: str) -> None: """ Julia-ACE script runner. @@ -1638,7 +1649,7 @@ def run_ace(num_processes: int, script_name: str): subprocess.call(["julia", script_name], stdout=file_out, stderr=file_err) -def run_gap(num_processes: int, parameters): +def run_gap(num_processes: int, parameters) -> None: """ GAP runner. @@ -1660,7 +1671,7 @@ def run_gap(num_processes: int, parameters): def run_quip( num_processes: int, data_path, xml_file: str, filename: str, glue_xml: bool = False -): +) -> None: """ QUIP runner. @@ -1685,7 +1696,7 @@ def run_quip( subprocess.call(command, stdout=file_std, stderr=file_err, shell=True) -def run_nequip(command: str, log_prefix: str): +def run_nequip(command: str, log_prefix: str) -> None: """ Nequip runner. @@ -1703,7 +1714,7 @@ def run_nequip(command: str, log_prefix: str): subprocess.call(command.split(), stdout=file_out, stderr=file_err) -def run_mace(hypers: list): +def run_mace(hypers: list) -> None: """ MACE runner. @@ -1721,11 +1732,11 @@ def run_mace(hypers: list): def prepare_fit_environment( database_dir, - mlip_path, + mlip_path: Path, glue_xml: bool, train_name: str = "train.extxyz", test_name: str = "test.extxyz", -): +) -> Path: """ Prepare the environment for the fit. @@ -1763,7 +1774,9 @@ def prepare_fit_environment( return mlip_path -def convert_xyz_to_structure(atoms_list, include_forces=True, include_stresses=True): +def convert_xyz_to_structure( + atoms_list, include_forces=True, include_stresses=True +) -> tuple[list[Structure], list, list[object], list[object]]: """ Convert extxyz to pymatgen Structure format. @@ -1812,7 +1825,7 @@ def write_after_distillation_data_split( vasp_ref_name: str = "vasp_ref.extxyz", train_name: str = "train.extxyz", test_name: str = "test.extxyz", -): +) -> None: """ Write train.extxyz and test.extxyz after data distillation and split. From f617cec6947b449350981d297fd32e4e74ade71d Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Fri, 5 Jul 2024 17:52:07 +0200 Subject: [PATCH 2/6] improved documentation, and docstrings, variable readabilty and unit tests for regularizations --- autoplex/fitting/common/regularization.py | 195 ++++++++++--------- docs/user/flows/flows.md | 2 + tests/fitting/test_fitting_regularization.py | 186 +++++++++++------- 3 files changed, 224 insertions(+), 159 deletions(-) diff --git a/autoplex/fitting/common/regularization.py b/autoplex/fitting/common/regularization.py index 3b8ad7593..a8cb5d4ee 100644 --- a/autoplex/fitting/common/regularization.py +++ b/autoplex/fitting/common/regularization.py @@ -5,7 +5,7 @@ import traceback from contextlib import suppress -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING import numpy as np from scipy.spatial import ConvexHull, Delaunay @@ -219,45 +219,50 @@ def set_sigma( return atoms_modi -def get_convex_hull(atoms, energy_name="energy", **kwargs): - # CE I don't get what the function returns +def get_convex_hull( + atoms, energy_name="energy", **kwargs +) -> tuple[np.ndarray, np.ndarray]: """ - Calculate simple linear (E,V) convex hull. + Calculate the simple linear (E,V) convex hull. Parameters ---------- - atoms: (list) - list of atoms objects - energy_name: (str) - name of energy key in atoms.info (typically a DFT energy) + atoms: list + List of atoms objects. + energy_name: str + Name of the energy key in atoms.info (typically a DFT energy). Returns ------- - the list of points in the convex hull (lower half only), - and additionally all the points for testing purposes + tuple + A tuple containing two elements: + - lower_half_hull_points: list of points (volume, energy) in the convex hull (lower half only). + - p: list of all points for testing purposes. """ - p = [] - ct = 0 - for at in atoms: - if (at.info["config_type"] == "IsolatedAtom") or ( - at.info["config_type"] == "dimer" - ): + points_list = [] + failed_count = 0 + + for atom in atoms: + if atom.info["config_type"] in ["IsolatedAtom", "dimer"]: continue try: - v = at.get_volume() / len(at) - e = at.info[energy_name] / len(at) - p.append((v, e)) - except Exception: - ct += 1 - if ct > 0: - raise ValueError(f"Convex hull failed to include {ct}/{len(atoms)} structures") + volume_per_atom = atom.get_volume() / len(atom) + energy_per_atom = atom.info[energy_name] / len(atom) + points_list.append((volume_per_atom, energy_per_atom)) + except KeyError: + failed_count += 1 + + if failed_count > 0: + raise ValueError( + f"Convex hull failed to include {failed_count}/{len(atoms)} structures" + ) - p = np.array(p) - p = p.T[:, np.argsort(p.T[0])].T # sort in volume axis + points = np.array(points_list) + points = points.T[:, np.argsort(points.T[0])].T # sort by volume axis - hull = ConvexHull(p) # generates full convex hull, we only want bottom half - hull_points = p[hull.vertices] + hull = ConvexHull(points) # generate full convex hull + hull_points = points[hull.vertices] min_x_index = np.argmin(hull_points[:, 0]) max_x_index = np.argmax(hull_points[:, 0]) @@ -272,16 +277,18 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs): lower_half_hull.append(hull.vertices[i]) break - lower_half_hull_points = p[lower_half_hull] + lower_half_hull_points = points[lower_half_hull] lower_half_hull_points = lower_half_hull_points[ lower_half_hull_points[:, 1] <= np.max(lower_half_hull_points[:, 1]) ] - return lower_half_hull_points, p + return lower_half_hull_points, points -def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) -> float: +def get_e_distance_to_hull( + hull: np.array, atoms, energy_name="energy", **kwargs +) -> float: """ Calculate the distance of a structure to the linear convex hull in energy. @@ -289,14 +296,14 @@ def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) - ---------- hull: (np.array) points in the convex hull - at: (ase.Atoms) + atoms: (Atoms) structure to calculate distance to hull energy_name: (str) name of energy key in atoms.info (typically a DFT energy) """ - volume = at.get_volume() / len(at) - energy = at.info[energy_name] / len(at) + volume = atoms.get_volume() / len(atoms) + energy = atoms.info[energy_name] / len(atoms) tp = np.array([volume, energy]) hull_ps = hull.points if isinstance(hull, ConvexHull) else hull @@ -344,13 +351,13 @@ def get_intersect(a1, a2, b1, b2) -> tuple[float, float] | tuple: return x / z, y / z -def get_x(at, element_order=None) -> float | int: +def get_mole_frac(atoms, element_order=None) -> float | int: """ Calculate the mole-fraction of a structure. Parameters ---------- - at: (ase.Atoms) + atoms: (Atoms) structure to calculate mole-fraction of element_order: (list) list of atomic numbers in order of choice (e.g. [42, 16] for MoS2) @@ -361,21 +368,25 @@ def get_x(at, element_order=None) -> float | int: reduced mole-fraction of structure - first element n = 1-sum(others) """ - el, cts = np.unique(at.get_atomic_numbers(), return_counts=True) + element, cts = np.unique(atoms.get_atomic_numbers(), return_counts=True) - if element_order is None and len(el) < 3: # compatibility with old version - x = cts[1] / sum(cts) if len(el) == 2 else 1 + if element_order is None and len(element) < 3: # compatibility with old version + x = cts[1] / sum(cts) if len(element) == 2 else 1 else: # new version, requires element_order, recommended for all new calculations if element_order is None: - element_order = el # use default order - not_in = [i for i in element_order if i not in el] + element_order = element # use default order + not_in = [i for i in element_order if i not in element] for i in not_in: - el = np.insert(el, -1, i) + element = np.insert(element, -1, i) cts = np.insert(cts, -1, 0) - cts = np.array([cts[np.argwhere(el == i).squeeze()] for i in element_order]) - el = np.array([el[np.argwhere(el == i).squeeze()] for i in element_order]) + cts = np.array( + [cts[np.argwhere(element == i).squeeze()] for i in element_order] + ) + element = np.array( + [element[np.argwhere(element == i).squeeze()] for i in element_order] + ) x = cts[1:] / sum(cts) @@ -383,38 +394,38 @@ def get_x(at, element_order=None) -> float | int: def label_stoichiometry_volume( - ats, isolated_atoms_energies, e_name, element_order=None -): # CE I don't get what the function returns + atoms_list, isolated_atoms_energies, energy_name, element_order=None +) -> np.ndarray: """ Calculate the stoichiometry, energy, and volume coordinates for forming the convex hull. Parameters ---------- - ats: (list) + atoms_list: (Atoms) list of atoms objects isolated_atoms_energies: (dict) dictionary of isolated atom energies {atomic_number: energy} - e_name: (str) + energy_name: (str) name of energy key in atoms.info (typically a DFT energy) element_order: (list) list of atomic numbers in order of choice (e.g. [42, 16] for MoS2) """ - p = [] - for at in ats: + points_list = [] + for atom in atoms_list: try: - v = at.get_volume() / len(at) + volume = atom.get_volume() / len(atom) # make energy relative to isolated atoms - e = ( - at.info[e_name] - - sum([isolated_atoms_energies[j] for j in at.get_atomic_numbers()]) - ) / len(at) - x = get_x(at, element_order=element_order) - p.append(np.hstack((x, v, e))) - except Exception: + energy = ( + atom.info[energy_name] + - sum([isolated_atoms_energies[j] for j in atom.get_atomic_numbers()]) + ) / len(atom) + mole_frac = get_mole_frac(atom, element_order=element_order) + points_list.append(np.hstack((mole_frac, volume, energy))) + except KeyError: traceback.print_exc() - p = np.array(p) - return p.T[:, np.argsort(p.T[0])].T + points = np.array(points_list) + return points.T[:, np.argsort(points.T[0])].T def point_in_triangle_2D(p1, p2, p3, pn) -> bool: @@ -472,14 +483,14 @@ def point_in_triangle_ND(pn, *preg) -> bool: return hull.find_simplex(pn) >= 0 -def calculate_hull_3D(p) -> ConvexHull: +def calculate_hull_3D(points_3D) -> ConvexHull: """ Calculate the convex hull in 3D. Parameters ---------- - p: - point + points_3D: + point in 3D Returns ------- @@ -487,9 +498,13 @@ def calculate_hull_3D(p) -> ConvexHull: """ p0 = np.array( - [(p[:, i].max() - p[:, i].min()) / 2 + p[:, i].min() for i in range(2)] + [-1e6] + [ + (points_3D[:, i].max() - points_3D[:, i].min()) / 2 + points_3D[:, i].min() + for i in range(2) + ] + + [-1e6] ) # test point to get the visible facets from below - pn = np.vstack((p0, p)) + pn = np.vstack((p0, points_3D)) hull = ConvexHull(pn, qhull_options="QG0") hull.remove_dim = [] @@ -497,14 +512,14 @@ def calculate_hull_3D(p) -> ConvexHull: return hull -def calculate_hull_ND(p) -> ConvexHull: +def calculate_hull_ND(points_ND) -> ConvexHull: """ Calculate the convex hull in ND (N>=3). Parameters ---------- - p: - point + points_ND: + point in ND. Returns ------- @@ -513,16 +528,16 @@ def calculate_hull_ND(p) -> ConvexHull: """ p0 = np.array( [ - (p[:, i].max() - p[:, i].min()) / 2 + p[:, i].min() - for i in range(p.shape[1] - 1) + (points_ND[:, i].max() - points_ND[:, i].min()) / 2 + points_ND[:, i].min() + for i in range(points_ND.shape[1] - 1) ] + [-1e6] ) # test point to get the visible facets from below - pn = np.vstack((p0, p)) + pn = np.vstack((p0, points_ND)) remove_dim = [] - for i in range(p.shape[1]): - if np.all(p.T[i, 0] == p.T[i, :]): + for i in range(points_ND.shape[1]): + if np.all(points_ND.T[i, 0] == points_ND.T[i, :]): pn = np.delete(pn, i, axis=1) print(f"Convex hull lower dimensional - removing dimension {i}") remove_dim.append(i) @@ -535,7 +550,7 @@ def calculate_hull_ND(p) -> ConvexHull: def get_e_distance_to_hull_3D( - hull, at, isolated_atoms_energies=None, energy_name="energy", element_order=None + hull, atoms, isolated_atoms_energies=None, energy_name="energy", element_order=None ) -> float: """ Calculate the energy distance to the convex hull in 3D. @@ -544,7 +559,7 @@ def get_e_distance_to_hull_3D( ---------- hull: convex hull. - at: (ase.Atoms) + atoms: (ase.Atoms) structure to calculate mole-fraction of isolated_atoms_energies: (dict) dictionary of isolated atom energies @@ -554,37 +569,41 @@ def get_e_distance_to_hull_3D( list of atomic numbers in order of choice (e.g. [42, 16] for MoS2) """ - x = get_x(at, element_order=element_order) - e = ( - at.info[energy_name] - - sum([isolated_atoms_energies[j] for j in at.get_atomic_numbers()]) - ) / len(at) - v = at.get_volume() / len(at) - - sp = np.hstack([x, v, e]) + mole_frac = get_mole_frac(atoms, element_order=element_order) + energy = ( + atoms.info[energy_name] + - sum([isolated_atoms_energies[j] for j in atoms.get_atomic_numbers()]) + ) / len(atoms) + volume = atoms.get_volume() / len(atoms) + + sp = np.hstack([mole_frac, volume, energy]) for i in hull.remove_dim: sp = np.delete(sp, i) if len(sp[:-1]) == 1: # print('doing convexhull analysis in 1D') - return get_e_distance_to_hull(hull, at, energy_name=energy_name) + return get_e_distance_to_hull(hull, atoms, energy_name=energy_name) for _ct, visible_facet in enumerate(hull.simplices[hull.good]): if point_in_triangle_ND(sp[:-1], *hull.points[visible_facet][:, :-1]): n_3 = hull.points[visible_facet] - e = sp[-1] + energy = sp[-1] norm = np.cross(n_3[2] - n_3[0], n_3[1] - n_3[0]) - norm = norm / np.linalg.norm(norm) # plane normal - D = np.dot(norm, n_3[0]) # plane constant + plane_norm = norm / np.linalg.norm(norm) # plane normal + plane_constant = np.dot(plane_norm, n_3[0]) # plane constant - return e - (D - norm[0] * sp[0] - norm[1] * sp[1]) / norm[2] + return ( + energy + - (plane_constant - plane_norm[0] * sp[0] - plane_norm[1] * sp[1]) + / plane_norm[2] + ) print("Failed to find distance to hull") return 1e6 -def piecewise_linear(x, vals) -> Any: +def piecewise_linear(x, vals) -> np.ndarray: """ Piecewise linear. diff --git a/docs/user/flows/flows.md b/docs/user/flows/flows.md index f336d4352..bd4eeb50f 100644 --- a/docs/user/flows/flows.md +++ b/docs/user/flows/flows.md @@ -53,6 +53,8 @@ The `autoplex` workflow will then perform automated VASP and `phonopy` calculati Of course, you can change and adjust the settings to your own needs, e.g. by setting a smaller supercell for the `phonopy` calculations using `CompleteDFTvsMLBenchmarkWorkflow(min_length=15).make(...)`. You can find more details on the subsequent tutorial pages. +With additional flows or jobs in the `[complete_flow]` list, +you can combine the `autoplex` flow with other flows and jobs. The following workflow diagram will give you an overview of the flows and jobs in the default autoplex workflow: ```{mermaid} diff --git a/tests/fitting/test_fitting_regularization.py b/tests/fitting/test_fitting_regularization.py index 2730d972b..a65f220d3 100644 --- a/tests/fitting/test_fitting_regularization.py +++ b/tests/fitting/test_fitting_regularization.py @@ -7,7 +7,7 @@ get_convex_hull, get_e_distance_to_hull, get_intersect, - get_x, + get_mole_frac, label_stoichiometry_volume, point_in_triangle_ND, point_in_triangle_2D, @@ -63,79 +63,123 @@ def test_set_sigma(test_dir): def test_auxiliary_functions(test_dir, memory_jobstore, clean_dir): - from jobflow import run_locally from ase.io import read + from ase import Atoms import numpy as np - import scipy file = test_dir / "fitting" / "ref_files" / "quip_train.extxyz" - - atoms = read(file, ":") - - try: - get_convex = get_convex_hull(atoms) - - responses = run_locally( - get_convex, ensure_success=True, create_folders=True, store=memory_jobstore - ) - - except ValueError: - print("\nDOES NOT run as intended, error 'Convex hull failed to include 10/10 structures'") - assert True - - generic_array = np.array([1, 2, 3, 4, 5]) - - try: - get_e_dist_hull = get_e_distance_to_hull(generic_array, atoms) - except AttributeError: - print("\nTODO: implement proper unit test") - assert True - - point1, point2, point3, point4 = [1, 5], [2, 9], [8, 7], [9, 3] - point = np.array([[1, 2, 3], [4, 5, 6]]) - + atoms: Atoms = read(file, ":") + + # Define the arrays + array1 = np.array([ + [15.2266087, -3.80983557], + [15.2266087, -3.81106994], + [16.2004607, -3.81927384], + [8000.0, -0.28663766] + ]) + + array2 = np.array([ + [15.2266087, -3.80983557], + [15.2266087, -3.81106994], + [16.2004607, -3.81927384], + [16.2004607, -3.81927264], + [16.4281758, -3.81869979], + [17.6913485, -3.80636951], + [17.6913485, -3.80665250], + [19.0176670, -3.77969777], + [8000.0, -0.27567309], + [8000.0, -0.28663766] + ]) + + array3 = np.array([ + [0.5, 17.6913485, -3.53493109], + [0.5, 15.2266087, -3.53839715], + [0.5, 16.2004607, -3.54783542], + [0.5, 16.2004607, -3.54783422], + [0.5, 17.6913485, -3.53521408], + [0.5, 16.4281758, -3.54726137], + [0.5, 19.017667, -3.50825935], + [0.5, 15.2266087, -3.53963152], + [1.0, 8000.0, -0.01928852], + [1.0, 8000.0, -0.00014539] + ]) + + lower_half_hull_points, points = get_convex_hull(atoms, energy_name="REF_energy") + assert np.allclose(lower_half_hull_points, array1) + assert np.allclose(points, array2) + + label = label_stoichiometry_volume(atoms, {3: -0.28649227, 17: -0.25638457}, "REF_energy") + assert np.allclose(label, array3) + + calc_hull = calculate_hull_ND(points) + calc_hull_3D = calculate_hull_3D(label) + fraction_list = [[1.0]] + [[0.0]] + [[0.5]] * 8 + + for atom, fraction in zip(atoms, fraction_list): + get_e_dist_hull = get_e_distance_to_hull(calc_hull, atom, energy_name="REF_energy") + assert get_e_dist_hull == 0 + get_e_dist_hull_3D = get_e_distance_to_hull_3D(calc_hull_3D, atom, {3: -0.28649227, 17: -0.25638457}, + "REF_energy") + assert round(get_e_dist_hull_3D) == 0 + getmole_frac = get_mole_frac(atom, element_order=[3, 17]) + assert getmole_frac == fraction + + point1, point2, point3, point4 = (1, 5), (2, 9), (8, 7), (9, 3) get_inter = get_intersect(point1, point2, point3, point4) - - try: - getx = get_x(atoms) - except AttributeError: - print("\nTODO: implement proper unit test") - assert True - - try: - label = label_stoichiometry_volume(atoms, {3: -0.28649227, 17: -0.25638457}, "energy") - except IndexError: - print("\nTODO: implement proper unit test") - assert True - - try: - point_ND = point_in_triangle_ND(point) - except ValueError: - print("\nTODO: implement proper unit test") - assert True - + assert get_inter == (4.75, 20.0) point_2d = point_in_triangle_2D(point1, point2, point3, point4) - - try: - calc_hull = calculate_hull_ND(point) - except scipy.spatial._qhull.QhullError: - print("\nTODO: implement proper unit test") - assert True - - try: - calc_hull_3D = calculate_hull_3D(point) - except scipy.spatial._qhull.QhullError: - print("\nTODO: implement proper unit test") - assert True - - try: - get_e_dist_hull_3D = get_e_distance_to_hull_3D(generic_array, atoms, {3: -0.28649227, 17: -0.25638457}, "energy") - except AttributeError: - print("\nTODO: implement proper unit test") - assert True - - try: - piece_lin = piecewise_linear(point1, point) - except IndexError: - print("\nTODO: implement proper unit test") - assert True + assert point_2d is False + + # Define test values + vals = [ + (1.0, [1.0, 2.0, 3.0]), + (2.0, [2.0, 3.0, 4.0]), + (3.0, [3.0, 4.0, 5.0]), + (4.0, [4.0, 5.0, 6.0]) + ] + + # Define test values + x = 2.5 + expected_result = np.array([2.5, 3.5, 4.5]) + + piece_lin = piecewise_linear(x, vals) + assert np.allclose(piece_lin, expected_result) + + # Define a test case for 2D (Triangle) + point_2D_inside = np.array([0.5, 0.5]) + region_2D = [ + np.array([0.0, 0.0]), + np.array([1.0, 0.0]), + np.array([0.0, 1.0]) + ] + + point_2D_outside = np.array([1.5, 1.5]) + + # Test 2D case + inside_result_2D = point_in_triangle_ND(point_2D_inside, *region_2D) + outside_result_2D = point_in_triangle_ND(point_2D_outside, *region_2D) + + # Point point_2D_inside inside region: + assert inside_result_2D + # Point point_2D_outside outside region: + assert not outside_result_2D + + # Define a test case for 3D (Tetrahedron) + point_3D_inside = np.array([0.25, 0.25, 0.25]) + region_3D = [ + np.array([0.0, 0.0, 0.0]), + np.array([1.0, 0.0, 0.0]), + np.array([0.0, 1.0, 0.0]), + np.array([0.0, 0.0, 1.0]) + ] + + point_3D_outside = np.array([1.0, 1.0, 1.0]) + + # Test 3D case + inside_result_3D = point_in_triangle_ND(point_3D_inside, *region_3D) + outside_result_3D = point_in_triangle_ND(point_3D_outside, *region_3D) + + # Point point_3D_inside inside region: + assert inside_result_3D + # Point point_3D_outside outside region: + assert not outside_result_3D From 517c48adfd6de8e035cde8ef01c397c7adb7437d Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Fri, 5 Jul 2024 18:19:38 +0200 Subject: [PATCH 3/6] documentation improvements --- README.md | 31 ++++--------------------------- docs/dev/contributing.md | 26 ++++++++++++++++++++++++++ docs/index.md | 3 ++- docs/user/index.md | 2 +- 4 files changed, 33 insertions(+), 29 deletions(-) create mode 100644 docs/dev/contributing.md diff --git a/README.md b/README.md index 65055a9b7..278b406d8 100644 --- a/README.md +++ b/README.md @@ -7,33 +7,6 @@ `autoplex` is an evolving project and **contributions are very welcome**! To ensure that the code remains of high quality, please raise a pull request for any contributions, which will be reviewed before integration into the main branch of the code. In the beginning, Janine will take care of the reviews. -# General code structure -- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future) - - autoplex/submodule/job.py (any jobs defined will be inside this module) - - autoplex/submodule/flows.py (workflows defined will be hosted in this module) - - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module) - -# Guidelines for contributions -- Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples). -- Please ensure high coverage of the code based on the tests (you can test this with `coverage`). -- Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people) -- Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code). -- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish. - -# Formatting requirements -- Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`). -- If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`). - -# Commit guidelines -1. `pip install pre-commit`. -2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml) -3. Step 1 and 2 needs to be done only once in the local repository -4. Proceed with modifying the code and adding commits as usual. This should automatically run the linters. -5. To manually run the pre-commit hooks on all files, just use `pre-commit run --all-files` -6. To run pre-commit on a specific file, use `pre-commit run --files path/to/your/modified/module/` - -Please check out atomate2 for example code (https://github.com/materialsproject/atomate2) - # Setup In order to setup the mandatory prerequisites to be able to use `autoplex`, please follow the [installation guide of atomate2](https://materialsproject.github.io/atomate2/user/install.html). @@ -71,6 +44,10 @@ Pkg.add("DataFrames") Pkg.add("CSV") ``` +# Contributing guidelines + +Please follow the [contributing guidelines](docs/dev/contributing.md)! + # Workflow overview The following [Mermaid](https://mermaid.live/) diagram will give you an overview of the flows and jobs in the default autoplex workflow: diff --git a/docs/dev/contributing.md b/docs/dev/contributing.md new file mode 100644 index 000000000..42d32d635 --- /dev/null +++ b/docs/dev/contributing.md @@ -0,0 +1,26 @@ +# General code structure +- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future) + - autoplex/submodule/job.py (any jobs defined will be inside this module) + - autoplex/submodule/flows.py (workflows defined will be hosted in this module) + - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module) + +# Guidelines for contributions +- Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples). +- Please ensure high coverage of the code based on the tests (you can test this with `coverage`). +- Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people) +- Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code). +- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish. + +# Formatting requirements +- Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`). +- If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`). + +# Commit guidelines +1. `pip install pre-commit`. +2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml) +3. Step 1 and 2 needs to be done only once in the local repository +4. Proceed with modifying the code and adding commits as usual. This should automatically run the linters. +5. To manually run the pre-commit hooks on all files, just use `pre-commit run --all-files` +6. To run pre-commit on a specific file, use `pre-commit run --files path/to/your/modified/module/` + +Please check out atomate2 for example code (https://github.com/materialsproject/atomate2) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 05a91ef15..5678f9e09 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,9 +13,10 @@ reference/index ``` ```{toctree} -:caption: Developer Guide +:caption: Contirbuting Guide :hidden: dev/dev_install +dev/contributing ``` ```{toctree} diff --git a/docs/user/index.md b/docs/user/index.md index 9cd930abd..c0f2b1eca 100644 --- a/docs/user/index.md +++ b/docs/user/index.md @@ -3,6 +3,6 @@ Getting started ```{include} ../../README.md --- start-line: 3 -end-line: 73 +end-line: 51 --- ``` From f5849e95435bd51b80ae311260256c694704bc34 Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Fri, 5 Jul 2024 18:42:37 +0200 Subject: [PATCH 4/6] improving docstrings and documentation --- autoplex/fitting/common/jobs.py | 2 +- docs/dev/contributing.md | 19 ++++++++++--------- docs/user/index.md | 5 ++++- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py index de175620f..020e2a987 100644 --- a/autoplex/fitting/common/jobs.py +++ b/autoplex/fitting/common/jobs.py @@ -1,4 +1,4 @@ -"""fitting using GAP.""" +"""General fitting jobs using several MLIPs available.""" from __future__ import annotations from pathlib import Path diff --git a/docs/dev/contributing.md b/docs/dev/contributing.md index 42d32d635..3d0896f70 100644 --- a/docs/dev/contributing.md +++ b/docs/dev/contributing.md @@ -1,21 +1,22 @@ -# General code structure -- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future) - - autoplex/submodule/job.py (any jobs defined will be inside this module) - - autoplex/submodule/flows.py (workflows defined will be hosted in this module) - - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module) - # Guidelines for contributions - Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples). - Please ensure high coverage of the code based on the tests (you can test this with `coverage`). - Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people) - Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code). -- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish. +- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. +Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish. + +## General code structure +- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future) + - autoplex/submodule/job.py (any jobs defined will be inside this module) + - autoplex/submodule/flows.py (workflows defined will be hosted in this module) + - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module) -# Formatting requirements +## Formatting requirements - Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`). - If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`). -# Commit guidelines +## Commit guidelines 1. `pip install pre-commit`. 2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml) 3. Step 1 and 2 needs to be done only once in the local repository diff --git a/docs/user/index.md b/docs/user/index.md index c0f2b1eca..338c03aea 100644 --- a/docs/user/index.md +++ b/docs/user/index.md @@ -3,6 +3,9 @@ Getting started ```{include} ../../README.md --- start-line: 3 -end-line: 51 +end-line: 46 --- ``` +# Contributing guidelines + +Please follow the [contributing guidelines](../dev/contributing.md)! From fc5acb10fbb85690c9c3475eafb629876ba3730b Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Fri, 5 Jul 2024 18:45:14 +0200 Subject: [PATCH 5/6] improving documentation --- docs/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/index.md b/docs/index.md index 5678f9e09..7dbd6cbc5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -13,7 +13,7 @@ reference/index ``` ```{toctree} -:caption: Contirbuting Guide +:caption: Contributing Guide :hidden: dev/dev_install dev/contributing @@ -80,9 +80,9 @@ assumes that you have a broad understanding of the key concepts. :link: dev/dev_install :link-type: doc :class-header: bg-light -**Developer guide** +**Contributing Guide** ^^^ Do you want to develop your own workflows or improve existing functionalities? -Check out the developer guide. +Check out the contributing Guide. ::: :::: From 9c349d6426c7a30da826a707c849f9dc55d3a512 Mon Sep 17 00:00:00 2001 From: QuantumChemist Date: Fri, 5 Jul 2024 19:43:04 +0200 Subject: [PATCH 6/6] improving the documentation --- docs/user/flows/flows.md | 3 + docs/user/jobflowremote.md | 203 ++++++++++++++++++++++++++++++++++++ docs/user/setup.md | 9 +- docs/user/test_project.yaml | 77 ++++++++++++++ 4 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 docs/user/jobflowremote.md create mode 100644 docs/user/test_project.yaml diff --git a/docs/user/flows/flows.md b/docs/user/flows/flows.md index bd4eeb50f..7296be0fe 100644 --- a/docs/user/flows/flows.md +++ b/docs/user/flows/flows.md @@ -127,6 +127,9 @@ You can manage your `autoplex` workflow using [`FireWorks`](https://materialspro Please follow the installation and setup instructions on the respective guide website. Both packages rely on the [MongoDB](https://www.mongodb.com/) database manager for data storage. +We recommend using `jobflow-remote` as it is more flexible to use, especially on clusters where users cannot store their +own MongoDB. You can find a more comprehensive `jobflow-remote` tutorial [here](../jobflowremote.md). + Submission using `FireWorks`: ```python from fireworks import LaunchPad diff --git a/docs/user/jobflowremote.md b/docs/user/jobflowremote.md new file mode 100644 index 000000000..a58c49e24 --- /dev/null +++ b/docs/user/jobflowremote.md @@ -0,0 +1,203 @@ +# Jobflow-remote setup + +This will result in a setup for automation where +1. We will add/submit job to db on your local machine. +2. Jobs will be executed on your remote custer. + +# Installation + +## on your local machine +1. Create a new env > `conda create -n autoplex python=3.10`. (You can choose any other env name.) +2. Activate your env > `conda activate autoplex` +3. Clone the jobflow remote repository using `git clone https://github.com/Matgenix/jobflow-remote.git` +4. Switch to interactive branch (use `git checkout remotes/origin/interactive`) and install it via `pip install .` in your env. +5. Install autoplex > In your local autoplex directory: `pip install -e .[strict]`. +6. Activate your env and run `jf project generate --full YOUR_PROJECT_NAME`. +This will generate an empty project config file in your home directory. +You can find this file inside `~/.jfremote` +(This is optional, a config file is provided here: [test_project.yaml](test_project.yaml), +you can simply copy this file to the `~/.jfremote` directory. You will need to create `~/.jfremote` directory in your home.) + + +## on your remote cluster +7. Repeat step 1,2,3,4 and 5 on your remote cluster. +8. Now setup atomate2 config as usual. +Just `atomate2/config/atomate2.yaml`. (We do not need to set up jobflow.yaml in atomate2/config) + +Below is an example `atomat2.yaml` config file +```yaml +VASP_CMD: your hpc vasp_std cmd +VASP_GAMMA_CMD: your hpc vasp_gam cmd +LOBSTER_CMD: your hpc lobster cmd +``` + +9. Add environment variable to your ~/.bashrc `export ATOMATE2_CONFIG_FILE="/path/to/atomate2/config/atomate2.yaml"` + +## Changes to be done in the config file - on your local machine +1. Set paths to base, tmp, log, daemon dir. Best would be, simply creating empty dirs in your `~/.jfremote` directory. +Use the paths as provided in sample config file for reference. +2. Under the `workers` section of the yaml, change worker name from `example_worker` to your liking, set `work_dir` +(directory where calcs will be run), set `pre_run` command (use to activate the environment before job execution), +set `user` (this your username on your remote cluster) +3. In `queue` section, just change details as per your mongodb (admin username password, host, port, name) + + +# Check if your setup works correctly + +> Note: If you have any password protected key in your `~/.ssh` directory worker might fail to start. To overcome this, temporarily move your passphrase procted keys from `~/.ssh` directory to some other directory before starting the runner. + +1. `jf project check -w example_worker` +(If everything is setup correctly, you will get asked for password and OTP and will exit with a green tick in few secs.) +2. `jf project check --errors` this will check all even your MongoDB connection is proper or not. +If anything fails, please check the config file. + + +# Getting started + +1. Run `jf admin reset` (Do not worry, this will reset your db, necessary to do only once. +You can skip this if you want to keep the data in your db.) +2. `jf runner start -s -i` + +You will be prompted with a question "Do you want to open the connection for the host of the XXX worker?" +Answer "y". And then you should be prompted for password and OTP. +After that you can quit the interactive mode with ctrl+c. The runner should now be working fine until the connection drops. + +During the starting of the runner, you will probably see a few error/warnings. +First, a warning that the password may be echoed. Ignore it, it should not. + +3. `jf runner status` (this should return status of runner as `running`, if everything is set up correctly) + + +# Example job scripts to test (Add/Submit jobs to DB from your local machine) + +## Simple python job + +```python +from jobflow_remote.utils.examples import add +from jobflow_remote import submit_flow +from jobflow import Flow + +job1 = add(1, 2) +job2 = add(job1.output, 2) + +flow = Flow([job1, job2]) + +resources = {"nodes": N, "partition": "name", "time": "01:00:00", "ntasks": ntasks, "qverbatim": "#SBATCH --get-user-env", + "mail_user": "your_email@adress", "mail_type": "ALL"} + +print(submit_flow(flow, worker="example_worker", resources=resources, project="test_project")) +# Do not forget to change worker and project name to what you se tup in the jobflow remote config file. +``` + +## VASP relax job using atomate2 workflow + +```python +from jobflow_remote.utils.examples import add +from jobflow_remote import submit_flow +from jobflow import Flow +from mp_api.client import MPRester +from atomate2.vasp.flows.core import DoubleRelaxMaker +from atomate2.vasp.powerups import update_user_incar_settings + + +mpid = "mp-22862" +mr = MPRester(api_key='YOUR_MP_API_KEY') +struct = mr.get_structure_by_material_id(mpid) + +# we use the same structure (mp-22862) here and instantiate the workflow +relax_job = DoubleRelaxMaker().make(structure=struct) + +relax_job = update_user_incar_settings(relax_job, {"NPAR": 4}) + +# You can also pass exe_config for the worker using exe_config in submit flow. Below is an example +# exec_config={"pre_run": "source activate autoplex \n module load slurm_setup \n module load vasp/6.1.2"} + +resources = {"nodes": N, "partition": "name", "time": "01:00:00", "ntasks": ntasks, "qverbatim": "#SBATCH --get-user-env", + "mail_user": "your_email@adress", "mail_type": "ALL"} + +print(submit_flow(relax_job, worker="example_worker", resources=resources, project="test_project")) +``` +It is crucial to set `"qverbatim": "#SBATCH --get-user-env"` to make sure the same environment is used on your remote cluster. + +# Setting different workers for different job types + +This is very much similar to how we do in atomate2, jobflow-remote provides a specific utility for this. +```python +from jobflow_remote import set_run_config +``` +An example use case can be found [here](https://matgenix.github.io/jobflow-remote/user/tuning.html#jobconfig) + +# Querying completed jobs from DB using jobflow-remote Python API + +```python +from jobflow_remote import get_jobstore + +js = get_jobstore(project_name='YOUR_PROJECT_NAME') +js.connect() +result = js.query(criteria={"name": "generate_frequencies_eigenvectors"},load=True) +# example query for completed phonon workflow runs +# the query methods are the same as in atomate2 basically, +for i in result: + print(i['output']["phonon_bandstructure"]) + # get phonon banstructure pymatgen object +``` + +# Updating failed jobs time limit or execution config +```python +from jobflow_remote.jobs.jobcontroller import JobController + +jc = JobController.from_project_name(project_name='YOUR_PROJECT_NAME') # initialize a job controller + +job_docs = jc.get_jobs_doc(db_ids='214') # query job docs based on different criteria +# (Check documentation to see all available options https://github.com/Matgenix/jobflow-remote/blob/967e7c512f230105b1a82c2227fb101d8d4acb3d/src/jobflow_remote/jobs/jobcontroller.py#L467) + +# get your existing resources +resources = job_docs[0].resources + +# update time limit in the retrieved dict (you can update any other keys like partition/ nodes etc as well) +resources["time"] = '8:00:00' + +jc.rerun_job(db_id=job_docs[0].db_id, force=True) # important for jobs that are in failed state to reset them first +jc.set_job_run_properties(db_ids=[job_docs[0].db_id], resources=resources) # this will update the DB entry +``` + +> IMPORTANT: When you restart VASP calculations, make sure to move the old VASP files somewhere else, +> because jobflow-remote will restart your calculation in the same directory and that leads to some clash of old and new files. + +# Update pre-exsiting job input parameters in the db + +```python +# Note that this way is bit involved and you need to find exact structure of your nested db entry based on type of maker used + +# Following is an example for failed vasp job where NPAR and ALGO tags in DB entry are updated +from jobflow_remote.jobs.jobcontroller import JobController + +jc = JobController.from_project_name(project_name='YOUR_PROJECT_NAME') + +job_collection = jc.db.jobs # get jobs collection from mongoDB + +for i in job_collection.find({'db_id': '214'}): + job_dict = i['_id'] # get object id in mongodb (this is used to as filter) + incar_settings = i['job']['function']['@bound']['input_set_generator']['user_incar_settings'] # get existing user incar settings + +incar_settings.update({'NPAR': 2, 'ALGO': 'FAST'}) # now update incar settings here as per requirement +job_collection.update_one({'_id': job_dict}, {'$set': {'job.function.@bound.input_set_generator.user_incar_settings' : incar_settings}}) + +print(jc.get_jobs_doc(db_ids='214')[0].job.maker.input_set_generator.user_incar_settings) # check if entries are updated +``` +> IMPORTANT: When you restart VASP calculations, make sure to move the old VASP files somewhere else, +> because jobflow-remote will restart your calculation in the same directory and that leads to some clash of old and new files. + +# Some useful commands + +1. `jf job list` (list jobs in the db) +2. `jf flow list` (list of flows in the db) +3. `jf job info jobid` (provides some info of job like workdir, error info if it failed) +4. `jf flow delete -did db_id` (deletes flow from db) +5. `jf flow -h` or `jf job -h` for checking other options + +# Some useful links + +1. Check slurm.py for finding different available options you can set for resources dict [here](https://github.com/Matgenix/qtoolkit/tree/develop/src/qtoolkit/io) +2. More details on project config and settings can be found [here](https://matgenix.github.io/jobflow-remote/user/projectconf.html) +3. Details on different setup options [here](https://matgenix.github.io/jobflow-remote/user/install.html) diff --git a/docs/user/setup.md b/docs/user/setup.md index b3e5b3409..36af44236 100644 --- a/docs/user/setup.md +++ b/docs/user/setup.md @@ -5,4 +5,11 @@ We are referring the user to the [installation guide of atomate2](https://materi be able to use `autoplex`. After setting up `atomate2`, make sure to add `VASP_INCAR_UPDATES: {"NPAR": number}` in your ~/atomate2/config/atomate2.yaml file. -Set a number that is a divisor of the number of tasks you use for the VASP calculations. \ No newline at end of file +Set a number that is a divisor of the number of tasks you use for the VASP calculations. + +You can manage your `autoplex` workflow using [`FireWorks`](https://materialsproject.github.io/fireworks/) or [`jobflow-remote`](https://matgenix.github.io/jobflow-remote/). +Please follow the installation and setup instructions on the respective guide website. +Both packages rely on the [MongoDB](https://www.mongodb.com/) database manager for data storage. + +We recommend using `jobflow-remote` as it is more flexible to use, especially on clusters where users cannot store their +own MongoDB. You can find a more comprehensive `jobflow-remote` tutorial [here](jobflowremote.md). \ No newline at end of file diff --git a/docs/user/test_project.yaml b/docs/user/test_project.yaml new file mode 100644 index 000000000..c3ce3767f --- /dev/null +++ b/docs/user/test_project.yaml @@ -0,0 +1,77 @@ +name: test_project +base_dir: /home/username/.jfremote/test_project +tmp_dir: /home/username/.jfremote/test_project/tmp +log_dir: /home/username/.jfremote/test_project/log +daemon_dir: /home/username/.jfremote/test_project/daemon +log_level: debug +runner: + delay_checkout: 30 + delay_check_run_status: 30 + delay_advance_status: 30 + delay_refresh_limited: 600 + delay_update_batch: 60 + lock_timeout: 86400 + delete_tmp_folder: true + max_step_attempts: 3 + delta_retry: + - 30 + - 300 + - 1200 +workers: + example_worker: + type: remote + scheduler_type: slurm + work_dir: /path/to/your/scratch/dir + resources: + pre_run: | + source activate autoplex + post_run: + timeout_execute: 120 + max_jobs: 10 + batch: + host: remote cluster + user: username + port: + password: + key_filename: + passphrase: + gateway: + forward_agent: + connect_timeout: + connect_kwargs: + inline_ssh_env: + keepalive: 60 + shell_cmd: bash + login_shell: true + interactive_login: true +queue: + store: + type: MongoStore + host: local machine + database: db name + username: user name + password: password + collection_name: jobs + flows_collection: flows + auxiliary_collection: jf_auxiliary + db_id_prefix: +exec_config: {} +jobstore: + docs_store: + type: MongoStore + database: db name + host: local machine + port: 27017 + username: user name + password: password + collection_name: outputs + additional_stores: + data: + type: GridFSStore + database: db name + host: local machine + port: 27017 + username: user name + password: password + collection_name: outputs_blobs +metadata: \ No newline at end of file