From a48adefa767acc2fd0e9b1db13db03f0f1c3adbf Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Thu, 4 Jul 2024 21:36:39 +0200
Subject: [PATCH 1/6] improved docstring and added type-hints

---
 autoplex/benchmark/phonons/utils.py       | 15 ++--
 autoplex/data/common/utils.py             | 35 +++++-----
 autoplex/data/phonons/utils.py            |  6 +-
 autoplex/fitting/common/jobs.py           |  2 +-
 autoplex/fitting/common/regularization.py | 27 +++++---
 autoplex/fitting/common/utils.py          | 83 +++++++++++++----------
 6 files changed, 100 insertions(+), 68 deletions(-)

diff --git a/autoplex/benchmark/phonons/utils.py b/autoplex/benchmark/phonons/utils.py
index 7accb5b92..0a7014637 100644
--- a/autoplex/benchmark/phonons/utils.py
+++ b/autoplex/benchmark/phonons/utils.py
@@ -1,8 +1,15 @@
 """Utility functions for benchmarking jobs."""
 
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 import matplotlib.pyplot as plt
 import numpy as np
-from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine
+
+if TYPE_CHECKING:
+    from matplotlib.figure import Figure
+    from pymatgen.phonon.bandstructure import PhononBandStructureSymmLine
 from pymatgen.phonon.plotter import PhononBSPlotter
 
 
@@ -10,7 +17,7 @@ def get_rmse(
     ml_bs: PhononBandStructureSymmLine,
     dft_bs: PhononBandStructureSymmLine,
     q_dependent_rmse: bool = False,
-):
+) -> float | list[float]:
     """
     Compute root mean squared error (rmse) between DFT and ML phonon band-structure.
 
@@ -44,7 +51,7 @@ def rmse_qdep_plot(
     which_q_path=1,
     file_name="rms.pdf",
     img_format="pdf",
-):
+) -> plt:
     """
     Save q dependent root mean squared error plot between DFT and ML phonon band-structure.
 
@@ -94,7 +101,7 @@ def compare_plot(
     ml_bs: PhononBandStructureSymmLine,
     dft_bs: PhononBandStructureSymmLine,
     file_name: str = "band_comparison.pdf",
-):
+) -> Figure:
     """
     Save DFT and ML phonon band-structure overlay plot for visual comparison.
 
diff --git a/autoplex/data/common/utils.py b/autoplex/data/common/utils.py
index 16949df7e..acd966139 100644
--- a/autoplex/data/common/utils.py
+++ b/autoplex/data/common/utils.py
@@ -85,7 +85,7 @@ def scale_cell(
     volume_scale_factor_range: list[float] | None = None,
     n_structures: int = 10,
     volume_custom_scale_factors: list[float] | None = None,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates stretched or compressed structures.
 
@@ -154,9 +154,9 @@ def scale_cell(
     return distorted_cells
 
 
-def check_distances(structure: Structure, min_distance: float = 1.5):
+def check_distances(structure: Structure, min_distance: float = 1.5) -> bool:
     """
-    Take in a pymatgen Structure object and checks distances between atoms using minimum image convention.
+    Take in a pymatgen Structure object and check minimum distances between atoms using minimum image convention.
 
     Useful after distorting cell angles and rattling to check atoms aren't too close.
 
@@ -191,7 +191,7 @@ def random_vary_angle(
     w_angle: list[float] | None = None,
     n_structures: int = 8,
     angle_max_attempts: int = 1000,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates angle-distorted structures.
 
@@ -237,10 +237,10 @@ def random_vary_angle(
             volume_custom_scale_factors=[1.03],
         )
 
-        distorted_cells = AseAtomsAdaptor.get_atoms(distorted_cells[0])
+        distorted_supercells: Atoms = AseAtomsAdaptor.get_atoms(distorted_cells[0])
 
-        # getting stretched cell out of array
-        newcell = distorted_cells.cell.cellpar()
+        # getting stretched supercell out of array
+        newcell = distorted_supercells.cell.cellpar()
 
         # current angles
         alpha = atoms_copy.cell.cellpar()[3]
@@ -287,7 +287,7 @@ def std_rattle(
     n_structures: int = 5,
     rattle_std: float = 0.01,
     rattle_seed: int = 42,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates rattled structures.
 
@@ -331,7 +331,7 @@ def mc_rattle(
     min_distance: float = 1.5,
     rattle_seed: int = 42,
     rattle_mc_n_iter: int = 10,
-):
+) -> list[Structure]:
     """
     Take in a pymatgen Structure object and generates rattled structures.
 
@@ -375,7 +375,7 @@ def mc_rattle(
     return [AseAtomsAdaptor.get_structure(xtal) for xtal in mc_rattle]
 
 
-def extract_base_name(filename, is_out=False):
+def extract_base_name(filename, is_out=False) -> str:
     """
     Extract the base of a file name to easier manipulate other file names.
 
@@ -401,7 +401,7 @@ def extract_base_name(filename, is_out=False):
     return "A problem with the files occurred."
 
 
-def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005):
+def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005) -> None:
     """
     Filter data outliers per energy criteria and write them into files.
 
@@ -457,7 +457,9 @@ def filter_outlier_energy(in_file, out_file, criteria: float = 0.0005):
     )
 
 
-def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1):
+def filter_outlier_forces(
+    in_file, out_file, symbol="Si", criteria: float = 0.1
+) -> None:
     """
     Filter data outliers per force criteria and write them into files.
 
@@ -526,13 +528,14 @@ def filter_outlier_forces(in_file, out_file, symbol="Si", criteria: float = 0.1)
     )
 
 
-# copied from libatoms GAP tutorial page and adjusted
 def energy_plot(
     in_file, out_file, ax, title: str = "Plot of energy", label: str = "energy"
-):
+) -> None:
     """
     Plot the distribution of energy per atom on the output vs the input.
 
+    Adapted and adjusted from libatoms GAP tutorial page https://libatoms.github.io/GAP/gap_fitting_tutorial.html.
+
     Parameters
     ----------
     in_file:
@@ -610,7 +613,7 @@ def force_plot(
     symbol: str = "Si",
     title: str = "Plot of force",
     label: str = "force for ",
-):
+) -> float:
     """
     Plot the distribution of force components per atom on the output vs the input.
 
@@ -700,7 +703,7 @@ def plot_energy_forces(
     species_list: list | None = None,
     train_name: str = "train.extxyz",
     test_name: str = "test.extxyz",
-):
+) -> None:
     """
     Plot energy and forces of the data.
 
diff --git a/autoplex/data/phonons/utils.py b/autoplex/data/phonons/utils.py
index 0ac58b380..0755c796f 100644
--- a/autoplex/data/phonons/utils.py
+++ b/autoplex/data/phonons/utils.py
@@ -18,7 +18,11 @@ def ml_phonon_maker_preparation(
     bulk_relax_maker: ForceFieldRelaxMaker,
     phonon_displacement_maker: ForceFieldStaticMaker,
     static_energy_maker: ForceFieldStaticMaker,
-):
+) -> tuple[
+    ForceFieldRelaxMaker | None,
+    ForceFieldStaticMaker | None,
+    ForceFieldStaticMaker | None,
+]:
     """
     Prepare the MLPhononMaker for the respective MLIP model.
 
diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py
index f82f95bec..de175620f 100644
--- a/autoplex/fitting/common/jobs.py
+++ b/autoplex/fitting/common/jobs.py
@@ -33,7 +33,7 @@ def machine_learning_fit(
     **kwargs,
 ):
     """
-    Maker for fitting potential(s).
+    Job for fitting potential(s).
 
     Parameters
     ----------
diff --git a/autoplex/fitting/common/regularization.py b/autoplex/fitting/common/regularization.py
index 9c4dd56e5..3b8ad7593 100644
--- a/autoplex/fitting/common/regularization.py
+++ b/autoplex/fitting/common/regularization.py
@@ -5,10 +5,14 @@
 
 import traceback
 from contextlib import suppress
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 from scipy.spatial import ConvexHull, Delaunay
 
+if TYPE_CHECKING:
+    from ase import Atoms
+
 
 def set_sigma(
     atoms,
@@ -21,7 +25,7 @@ def set_sigma(
     element_order=None,
     max_energy=20.0,
     config_type_override=None,
-):
+) -> list[Atoms]:
     """
     Handle automatic regularisation based on distance to convex hull, amongst other things.
 
@@ -216,6 +220,7 @@ def set_sigma(
 
 
 def get_convex_hull(atoms, energy_name="energy", **kwargs):
+    # CE I don't get what the function returns
     """
     Calculate simple linear (E,V) convex hull.
 
@@ -276,7 +281,7 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs):
     return lower_half_hull_points, p
 
 
-def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs):
+def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) -> float:
     """
     Calculate the distance of a structure to the linear convex hull in energy.
 
@@ -315,7 +320,7 @@ def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs):
     )
 
 
-def get_intersect(a1, a2, b1, b2):
+def get_intersect(a1, a2, b1, b2) -> tuple[float, float] | tuple:
     """
     Return the point of intersection of the lines passing through a2,a1 and b2,b1.
 
@@ -339,7 +344,7 @@ def get_intersect(a1, a2, b1, b2):
     return x / z, y / z
 
 
-def get_x(at, element_order=None):
+def get_x(at, element_order=None) -> float | int:
     """
     Calculate the mole-fraction of a structure.
 
@@ -379,7 +384,7 @@ def get_x(at, element_order=None):
 
 def label_stoichiometry_volume(
     ats, isolated_atoms_energies, e_name, element_order=None
-):
+):  # CE I don't get what the function returns
     """
     Calculate the stoichiometry, energy, and volume coordinates for forming the convex hull.
 
@@ -412,7 +417,7 @@ def label_stoichiometry_volume(
     return p.T[:, np.argsort(p.T[0])].T
 
 
-def point_in_triangle_2D(p1, p2, p3, pn):
+def point_in_triangle_2D(p1, p2, p3, pn) -> bool:
     """
     Check if a point is inside a triangle in 2D.
 
@@ -449,7 +454,7 @@ def point_in_triangle_2D(p1, p2, p3, pn):
     )
 
 
-def point_in_triangle_ND(pn, *preg):
+def point_in_triangle_ND(pn, *preg) -> bool:
     """
     Check if a point is inside a region of hyperplanes in N dimensions.
 
@@ -467,7 +472,7 @@ def point_in_triangle_ND(pn, *preg):
     return hull.find_simplex(pn) >= 0
 
 
-def calculate_hull_3D(p):
+def calculate_hull_3D(p) -> ConvexHull:
     """
     Calculate the convex hull in 3D.
 
@@ -492,7 +497,7 @@ def calculate_hull_3D(p):
     return hull
 
 
-def calculate_hull_ND(p):
+def calculate_hull_ND(p) -> ConvexHull:
     """
     Calculate the convex hull in ND (N>=3).
 
@@ -531,7 +536,7 @@ def calculate_hull_ND(p):
 
 def get_e_distance_to_hull_3D(
     hull, at, isolated_atoms_energies=None, energy_name="energy", element_order=None
-):
+) -> float:
     """
     Calculate the energy distance to the convex hull in 3D.
 
@@ -579,7 +584,7 @@ def get_e_distance_to_hull_3D(
     return 1e6
 
 
-def piecewise_linear(x, vals):
+def piecewise_linear(x, vals) -> Any:
     """
     Piecewise linear.
 
diff --git a/autoplex/fitting/common/utils.py b/autoplex/fitting/common/utils.py
index d0c93df4b..aee17eb36 100644
--- a/autoplex/fitting/common/utils.py
+++ b/autoplex/fitting/common/utils.py
@@ -14,6 +14,11 @@
 from functools import partial
 from itertools import combinations
 from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ase.atoms import Atom
+    from pymatgen.core import Structure
 
 import ase
 import lightning as pl
@@ -61,7 +66,7 @@ def gap_fitting(
     train_name: str = "train.extxyz",
     test_name: str = "test.extxyz",
     fit_kwargs: dict | None = None,  # pylint: disable=E3701
-):
+) -> dict:
     """
     GAP fit and validation job.
 
@@ -229,7 +234,7 @@ def ace_fitting(
     solver: str = "BLR",
     isolated_atoms_energies: dict | None = None,
     num_processes: int = 32,
-):
+) -> dict:
     """
     Perform the ACE (Atomic Cluster Expansion) potential fitting.
 
@@ -389,7 +394,7 @@ def nequip_fitting(
     default_dtype: str = "float32",
     isolated_atoms_energies: dict | None = None,
     device: str = "cuda",
-):
+) -> dict:
     """
     Perform the NequIP potential fitting.
 
@@ -629,7 +634,7 @@ def m3gnet_fitting(
     max_n: int = 4,
     device: str = "cuda",
     test_equal_to_val: bool = True,
-):
+) -> dict:
     """
     Perform the M3GNet potential fitting.
 
@@ -944,7 +949,7 @@ def mace_fitting(
     loss: str = None,
     default_dtype: str = None,
     device: str = "cuda",
-):
+) -> dict:
     """
     Perform the MACE potential fitting.
 
@@ -1026,7 +1031,7 @@ def mace_fitting(
     }
 
 
-def check_convergence(test_error):
+def check_convergence(test_error) -> bool:
     """
     Check the convergence of the fit.
 
@@ -1046,7 +1051,7 @@ def check_convergence(test_error):
     return convergence
 
 
-def load_gap_hyperparameter_defaults(gap_fit_parameter_file_path: str | Path):
+def load_gap_hyperparameter_defaults(gap_fit_parameter_file_path: str | Path) -> dict:
     """
     Load gap fit default parameters from the json file.
 
@@ -1069,7 +1074,7 @@ def gap_hyperparameter_constructor(
     include_two_body: bool = False,
     include_three_body: bool = False,
     include_soap: bool = False,
-):
+) -> list:
     """
     Construct a list of arguments needed to execute gap potential from the parameters' dict.
 
@@ -1129,7 +1134,7 @@ def gap_hyperparameter_constructor(
     return [*general, gap_hyperparameters]
 
 
-def get_list_of_vasp_calc_dirs(flow_output):
+def get_list_of_vasp_calc_dirs(flow_output) -> list[str]:
     """
     Return a list of vasp_calc_dirs from PhononDFTMLDataGenerationFlow output.
 
@@ -1143,7 +1148,7 @@ def get_list_of_vasp_calc_dirs(flow_output):
     list.
         A list of vasp_calc_dirs
     """
-    list_of_vasp_calc_dirs = []
+    list_of_vasp_calc_dirs: list[str] = []
     for output in flow_output.values():
         for output_type, dirs in output.items():
             if output_type != "phonon_data" and isinstance(dirs, list):
@@ -1163,7 +1168,7 @@ def vaspoutput_2_extended_xyz(
     regularization: float = 0.1,
     f_min: float = 0.01,  # unit: eV Å-1
     atom_wise_regularization: bool = True,
-):
+) -> None:
     """
     Parse all VASP output files (vasprun.xml/OUTCAR) and generates a vasp_ref.extxyz.
 
@@ -1227,7 +1232,7 @@ class Species:
     def __init__(self, atoms):
         self.atoms = atoms
 
-    def get_species(self):
+    def get_species(self) -> list[str]:
         """
         Get species.
 
@@ -1236,7 +1241,7 @@ def get_species(self):
         species_list:
             a list of species.
         """
-        species_list = []
+        species_list: list[str] = []
 
         for atom in self.atoms:
             symbol_all = atom.get_chemical_symbols()
@@ -1245,7 +1250,7 @@ def get_species(self):
 
         return species_list
 
-    def find_element_pairs(self, symbol_list=None):
+    def find_element_pairs(self, symbol_list=None) -> list:
         """
         Find element pairs.
 
@@ -1264,7 +1269,7 @@ def find_element_pairs(self, symbol_list=None):
 
         return list(combinations(species_list, 2))
 
-    def get_number_of_species(self):
+    def get_number_of_species(self) -> int:
         """
         Get number of species.
 
@@ -1275,7 +1280,7 @@ def get_number_of_species(self):
         """
         return int(len(self.get_species()))
 
-    def get_species_Z(self):
+    def get_species_Z(self) -> str:
         """
         Get species Z.
 
@@ -1297,7 +1302,7 @@ def get_species_Z(self):
         return species_Z
 
 
-def flatten(atoms_object, recursive=False):
+def flatten(atoms_object, recursive=False) -> list[str | bytes | Atoms] | list:
     """
     Flatten an iterable fully, but excluding Atoms objects.
 
@@ -1312,7 +1317,7 @@ def flatten(atoms_object, recursive=False):
     a flattened object, excluding the Atoms objects.
 
     """
-    iteration_list = []
+    iteration_list: list[str | bytes | Atoms] | list = []
 
     if recursive:
         for element in atoms_object:
@@ -1327,7 +1332,7 @@ def flatten(atoms_object, recursive=False):
     return [item for sublist in atoms_object for item in sublist]
 
 
-def gcm3_to_Vm(gcm3, mr, n_atoms=1):
+def gcm3_to_Vm(gcm3, mr, n_atoms=1) -> float:
     """
     Convert gcm3 to Vm.
 
@@ -1347,7 +1352,7 @@ def gcm3_to_Vm(gcm3, mr, n_atoms=1):
     return 1 / (n_atoms * (gcm3 / mr) * 6.022e23 / (1e8) ** 3)
 
 
-def get_atomic_numbers(species):
+def get_atomic_numbers(species) -> list[int]:
     """
     Get atomic numbers.
 
@@ -1370,7 +1375,13 @@ def get_atomic_numbers(species):
     return atom_numbers
 
 
-def stratified_dataset_split(atoms, split_ratio):
+def stratified_dataset_split(
+    atoms, split_ratio
+) -> tuple[
+    list[Atom | Atoms]
+    | list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]],
+    list[Atom | Atoms | list[Atom | Atoms] | list[Atom | Atoms | list]],
+]:
     """
     Split the dataset.
 
@@ -1420,7 +1431,7 @@ def stratified_dataset_split(atoms, split_ratio):
     return train_structures, test_structures
 
 
-def data_distillation(vasp_ref_dir, f_max):
+def data_distillation(vasp_ref_dir, f_max) -> list[Atom | Atoms]:
     """
     For data distillation.
 
@@ -1454,7 +1465,7 @@ def data_distillation(vasp_ref_dir, f_max):
     return atoms_distilled
 
 
-def energy_remain(in_file):
+def energy_remain(in_file) -> float:
     """
     Plot the distribution of energy per atom on the output vs. the input.
 
@@ -1494,7 +1505,7 @@ def energy_remain(in_file):
     return rms["rmse"]
 
 
-def extract_gap_label(xml_file_path):
+def extract_gap_label(xml_file_path) -> str:
     """
     Extract GAP label.
 
@@ -1514,7 +1525,7 @@ def extract_gap_label(xml_file_path):
     return root.tag
 
 
-def plot_convex_hull(all_points, hull_points):
+def plot_convex_hull(all_points, hull_points) -> None:
     """
     Plot convex hull.
 
@@ -1588,7 +1599,7 @@ def calculate_delta(atoms_db: list[Atoms], e_name: str) -> tuple[float, ndarray]
     return es_var / avg_neigh, num_triplet
 
 
-def compute_pairs_triplets(atoms):
+def compute_pairs_triplets(atoms) -> list[float]:
     """
     Calculate the number of pairwise and triplet within a cutoff distance for a given list of atoms.
 
@@ -1618,7 +1629,7 @@ def compute_pairs_triplets(atoms):
     return [num_pair, num_triplet]
 
 
-def run_ace(num_processes: int, script_name: str):
+def run_ace(num_processes: int, script_name: str) -> None:
     """
     Julia-ACE script runner.
 
@@ -1638,7 +1649,7 @@ def run_ace(num_processes: int, script_name: str):
         subprocess.call(["julia", script_name], stdout=file_out, stderr=file_err)
 
 
-def run_gap(num_processes: int, parameters):
+def run_gap(num_processes: int, parameters) -> None:
     """
     GAP runner.
 
@@ -1660,7 +1671,7 @@ def run_gap(num_processes: int, parameters):
 
 def run_quip(
     num_processes: int, data_path, xml_file: str, filename: str, glue_xml: bool = False
-):
+) -> None:
     """
     QUIP runner.
 
@@ -1685,7 +1696,7 @@ def run_quip(
         subprocess.call(command, stdout=file_std, stderr=file_err, shell=True)
 
 
-def run_nequip(command: str, log_prefix: str):
+def run_nequip(command: str, log_prefix: str) -> None:
     """
     Nequip runner.
 
@@ -1703,7 +1714,7 @@ def run_nequip(command: str, log_prefix: str):
         subprocess.call(command.split(), stdout=file_out, stderr=file_err)
 
 
-def run_mace(hypers: list):
+def run_mace(hypers: list) -> None:
     """
     MACE runner.
 
@@ -1721,11 +1732,11 @@ def run_mace(hypers: list):
 
 def prepare_fit_environment(
     database_dir,
-    mlip_path,
+    mlip_path: Path,
     glue_xml: bool,
     train_name: str = "train.extxyz",
     test_name: str = "test.extxyz",
-):
+) -> Path:
     """
     Prepare the environment for the fit.
 
@@ -1763,7 +1774,9 @@ def prepare_fit_environment(
     return mlip_path
 
 
-def convert_xyz_to_structure(atoms_list, include_forces=True, include_stresses=True):
+def convert_xyz_to_structure(
+    atoms_list, include_forces=True, include_stresses=True
+) -> tuple[list[Structure], list, list[object], list[object]]:
     """
     Convert extxyz to pymatgen Structure format.
 
@@ -1812,7 +1825,7 @@ def write_after_distillation_data_split(
     vasp_ref_name: str = "vasp_ref.extxyz",
     train_name: str = "train.extxyz",
     test_name: str = "test.extxyz",
-):
+) -> None:
     """
     Write train.extxyz and test.extxyz after data distillation and split.
 

From f617cec6947b449350981d297fd32e4e74ade71d Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Fri, 5 Jul 2024 17:52:07 +0200
Subject: [PATCH 2/6] improved documentation, and docstrings, variable
 readabilty and unit tests for regularizations

---
 autoplex/fitting/common/regularization.py    | 195 ++++++++++---------
 docs/user/flows/flows.md                     |   2 +
 tests/fitting/test_fitting_regularization.py | 186 +++++++++++-------
 3 files changed, 224 insertions(+), 159 deletions(-)

diff --git a/autoplex/fitting/common/regularization.py b/autoplex/fitting/common/regularization.py
index 3b8ad7593..a8cb5d4ee 100644
--- a/autoplex/fitting/common/regularization.py
+++ b/autoplex/fitting/common/regularization.py
@@ -5,7 +5,7 @@
 
 import traceback
 from contextlib import suppress
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING
 
 import numpy as np
 from scipy.spatial import ConvexHull, Delaunay
@@ -219,45 +219,50 @@ def set_sigma(
     return atoms_modi
 
 
-def get_convex_hull(atoms, energy_name="energy", **kwargs):
-    # CE I don't get what the function returns
+def get_convex_hull(
+    atoms, energy_name="energy", **kwargs
+) -> tuple[np.ndarray, np.ndarray]:
     """
-    Calculate simple linear (E,V) convex hull.
+    Calculate the simple linear (E,V) convex hull.
 
     Parameters
     ----------
-    atoms: (list)
-        list of atoms objects
-    energy_name: (str)
-        name of energy key in atoms.info (typically a DFT energy)
+    atoms: list
+        List of atoms objects.
+    energy_name: str
+        Name of the energy key in atoms.info (typically a DFT energy).
 
     Returns
     -------
-        the list of points in the convex hull (lower half only),
-        and additionally all the points for testing purposes
+    tuple
+        A tuple containing two elements:
+        - lower_half_hull_points: list of points (volume, energy) in the convex hull (lower half only).
+        - p: list of all points for testing purposes.
 
     """
-    p = []
-    ct = 0
-    for at in atoms:
-        if (at.info["config_type"] == "IsolatedAtom") or (
-            at.info["config_type"] == "dimer"
-        ):
+    points_list = []
+    failed_count = 0
+
+    for atom in atoms:
+        if atom.info["config_type"] in ["IsolatedAtom", "dimer"]:
             continue
         try:
-            v = at.get_volume() / len(at)
-            e = at.info[energy_name] / len(at)
-            p.append((v, e))
-        except Exception:
-            ct += 1
-    if ct > 0:
-        raise ValueError(f"Convex hull failed to include {ct}/{len(atoms)} structures")
+            volume_per_atom = atom.get_volume() / len(atom)
+            energy_per_atom = atom.info[energy_name] / len(atom)
+            points_list.append((volume_per_atom, energy_per_atom))
+        except KeyError:
+            failed_count += 1
+
+    if failed_count > 0:
+        raise ValueError(
+            f"Convex hull failed to include {failed_count}/{len(atoms)} structures"
+        )
 
-    p = np.array(p)
-    p = p.T[:, np.argsort(p.T[0])].T  # sort in volume axis
+    points = np.array(points_list)
+    points = points.T[:, np.argsort(points.T[0])].T  # sort by volume axis
 
-    hull = ConvexHull(p)  # generates full convex hull, we only want bottom half
-    hull_points = p[hull.vertices]
+    hull = ConvexHull(points)  # generate full convex hull
+    hull_points = points[hull.vertices]
 
     min_x_index = np.argmin(hull_points[:, 0])
     max_x_index = np.argmax(hull_points[:, 0])
@@ -272,16 +277,18 @@ def get_convex_hull(atoms, energy_name="energy", **kwargs):
             lower_half_hull.append(hull.vertices[i])
             break
 
-    lower_half_hull_points = p[lower_half_hull]
+    lower_half_hull_points = points[lower_half_hull]
 
     lower_half_hull_points = lower_half_hull_points[
         lower_half_hull_points[:, 1] <= np.max(lower_half_hull_points[:, 1])
     ]
 
-    return lower_half_hull_points, p
+    return lower_half_hull_points, points
 
 
-def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) -> float:
+def get_e_distance_to_hull(
+    hull: np.array, atoms, energy_name="energy", **kwargs
+) -> float:
     """
     Calculate the distance of a structure to the linear convex hull in energy.
 
@@ -289,14 +296,14 @@ def get_e_distance_to_hull(hull: np.array, at, energy_name="energy", **kwargs) -
     ----------
     hull: (np.array)
         points in the convex hull
-    at: (ase.Atoms)
+    atoms: (Atoms)
         structure to calculate distance to hull
     energy_name: (str)
         name of energy key in atoms.info (typically a DFT energy)
 
     """
-    volume = at.get_volume() / len(at)
-    energy = at.info[energy_name] / len(at)
+    volume = atoms.get_volume() / len(atoms)
+    energy = atoms.info[energy_name] / len(atoms)
     tp = np.array([volume, energy])
     hull_ps = hull.points if isinstance(hull, ConvexHull) else hull
 
@@ -344,13 +351,13 @@ def get_intersect(a1, a2, b1, b2) -> tuple[float, float] | tuple:
     return x / z, y / z
 
 
-def get_x(at, element_order=None) -> float | int:
+def get_mole_frac(atoms, element_order=None) -> float | int:
     """
     Calculate the mole-fraction of a structure.
 
     Parameters
     ----------
-    at: (ase.Atoms)
+    atoms: (Atoms)
         structure to calculate mole-fraction of
     element_order: (list)
         list of atomic numbers in order of choice (e.g. [42, 16] for MoS2)
@@ -361,21 +368,25 @@ def get_x(at, element_order=None) -> float | int:
         reduced mole-fraction of structure - first element n = 1-sum(others)
 
     """
-    el, cts = np.unique(at.get_atomic_numbers(), return_counts=True)
+    element, cts = np.unique(atoms.get_atomic_numbers(), return_counts=True)
 
-    if element_order is None and len(el) < 3:  # compatibility with old version
-        x = cts[1] / sum(cts) if len(el) == 2 else 1
+    if element_order is None and len(element) < 3:  # compatibility with old version
+        x = cts[1] / sum(cts) if len(element) == 2 else 1
 
     else:  # new version, requires element_order, recommended for all new calculations
         if element_order is None:
-            element_order = el  # use default order
-        not_in = [i for i in element_order if i not in el]
+            element_order = element  # use default order
+        not_in = [i for i in element_order if i not in element]
         for i in not_in:
-            el = np.insert(el, -1, i)
+            element = np.insert(element, -1, i)
             cts = np.insert(cts, -1, 0)
 
-        cts = np.array([cts[np.argwhere(el == i).squeeze()] for i in element_order])
-        el = np.array([el[np.argwhere(el == i).squeeze()] for i in element_order])
+        cts = np.array(
+            [cts[np.argwhere(element == i).squeeze()] for i in element_order]
+        )
+        element = np.array(
+            [element[np.argwhere(element == i).squeeze()] for i in element_order]
+        )
 
         x = cts[1:] / sum(cts)
 
@@ -383,38 +394,38 @@ def get_x(at, element_order=None) -> float | int:
 
 
 def label_stoichiometry_volume(
-    ats, isolated_atoms_energies, e_name, element_order=None
-):  # CE I don't get what the function returns
+    atoms_list, isolated_atoms_energies, energy_name, element_order=None
+) -> np.ndarray:
     """
     Calculate the stoichiometry, energy, and volume coordinates for forming the convex hull.
 
     Parameters
     ----------
-    ats: (list)
+    atoms_list: (Atoms)
         list of atoms objects
     isolated_atoms_energies: (dict)
         dictionary of isolated atom energies {atomic_number: energy}
-    e_name: (str)
+    energy_name: (str)
         name of energy key in atoms.info (typically a DFT energy)
     element_order: (list)
         list of atomic numbers in order of choice (e.g. [42, 16] for MoS2)
 
     """
-    p = []
-    for at in ats:
+    points_list = []
+    for atom in atoms_list:
         try:
-            v = at.get_volume() / len(at)
+            volume = atom.get_volume() / len(atom)
             # make energy relative to isolated atoms
-            e = (
-                at.info[e_name]
-                - sum([isolated_atoms_energies[j] for j in at.get_atomic_numbers()])
-            ) / len(at)
-            x = get_x(at, element_order=element_order)
-            p.append(np.hstack((x, v, e)))
-        except Exception:
+            energy = (
+                atom.info[energy_name]
+                - sum([isolated_atoms_energies[j] for j in atom.get_atomic_numbers()])
+            ) / len(atom)
+            mole_frac = get_mole_frac(atom, element_order=element_order)
+            points_list.append(np.hstack((mole_frac, volume, energy)))
+        except KeyError:
             traceback.print_exc()
-    p = np.array(p)
-    return p.T[:, np.argsort(p.T[0])].T
+    points = np.array(points_list)
+    return points.T[:, np.argsort(points.T[0])].T
 
 
 def point_in_triangle_2D(p1, p2, p3, pn) -> bool:
@@ -472,14 +483,14 @@ def point_in_triangle_ND(pn, *preg) -> bool:
     return hull.find_simplex(pn) >= 0
 
 
-def calculate_hull_3D(p) -> ConvexHull:
+def calculate_hull_3D(points_3D) -> ConvexHull:
     """
     Calculate the convex hull in 3D.
 
     Parameters
     ----------
-    p:
-        point
+    points_3D:
+        point in 3D
 
     Returns
     -------
@@ -487,9 +498,13 @@ def calculate_hull_3D(p) -> ConvexHull:
 
     """
     p0 = np.array(
-        [(p[:, i].max() - p[:, i].min()) / 2 + p[:, i].min() for i in range(2)] + [-1e6]
+        [
+            (points_3D[:, i].max() - points_3D[:, i].min()) / 2 + points_3D[:, i].min()
+            for i in range(2)
+        ]
+        + [-1e6]
     )  # test point to get the visible facets from below
-    pn = np.vstack((p0, p))
+    pn = np.vstack((p0, points_3D))
 
     hull = ConvexHull(pn, qhull_options="QG0")
     hull.remove_dim = []
@@ -497,14 +512,14 @@ def calculate_hull_3D(p) -> ConvexHull:
     return hull
 
 
-def calculate_hull_ND(p) -> ConvexHull:
+def calculate_hull_ND(points_ND) -> ConvexHull:
     """
     Calculate the convex hull in ND (N>=3).
 
     Parameters
     ----------
-    p:
-        point
+    points_ND:
+        point in ND.
 
     Returns
     -------
@@ -513,16 +528,16 @@ def calculate_hull_ND(p) -> ConvexHull:
     """
     p0 = np.array(
         [
-            (p[:, i].max() - p[:, i].min()) / 2 + p[:, i].min()
-            for i in range(p.shape[1] - 1)
+            (points_ND[:, i].max() - points_ND[:, i].min()) / 2 + points_ND[:, i].min()
+            for i in range(points_ND.shape[1] - 1)
         ]
         + [-1e6]
     )  # test point to get the visible facets from below
-    pn = np.vstack((p0, p))
+    pn = np.vstack((p0, points_ND))
     remove_dim = []
 
-    for i in range(p.shape[1]):
-        if np.all(p.T[i, 0] == p.T[i, :]):
+    for i in range(points_ND.shape[1]):
+        if np.all(points_ND.T[i, 0] == points_ND.T[i, :]):
             pn = np.delete(pn, i, axis=1)
             print(f"Convex hull lower dimensional - removing dimension {i}")
             remove_dim.append(i)
@@ -535,7 +550,7 @@ def calculate_hull_ND(p) -> ConvexHull:
 
 
 def get_e_distance_to_hull_3D(
-    hull, at, isolated_atoms_energies=None, energy_name="energy", element_order=None
+    hull, atoms, isolated_atoms_energies=None, energy_name="energy", element_order=None
 ) -> float:
     """
     Calculate the energy distance to the convex hull in 3D.
@@ -544,7 +559,7 @@ def get_e_distance_to_hull_3D(
     ----------
     hull:
         convex hull.
-    at: (ase.Atoms)
+    atoms: (ase.Atoms)
         structure to calculate mole-fraction of
     isolated_atoms_energies: (dict)
         dictionary of isolated atom energies
@@ -554,37 +569,41 @@ def get_e_distance_to_hull_3D(
         list of atomic numbers in order of choice (e.g. [42, 16] for MoS2)
 
     """
-    x = get_x(at, element_order=element_order)
-    e = (
-        at.info[energy_name]
-        - sum([isolated_atoms_energies[j] for j in at.get_atomic_numbers()])
-    ) / len(at)
-    v = at.get_volume() / len(at)
-
-    sp = np.hstack([x, v, e])
+    mole_frac = get_mole_frac(atoms, element_order=element_order)
+    energy = (
+        atoms.info[energy_name]
+        - sum([isolated_atoms_energies[j] for j in atoms.get_atomic_numbers()])
+    ) / len(atoms)
+    volume = atoms.get_volume() / len(atoms)
+
+    sp = np.hstack([mole_frac, volume, energy])
     for i in hull.remove_dim:
         sp = np.delete(sp, i)
 
     if len(sp[:-1]) == 1:
         # print('doing convexhull analysis in 1D')
-        return get_e_distance_to_hull(hull, at, energy_name=energy_name)
+        return get_e_distance_to_hull(hull, atoms, energy_name=energy_name)
 
     for _ct, visible_facet in enumerate(hull.simplices[hull.good]):
         if point_in_triangle_ND(sp[:-1], *hull.points[visible_facet][:, :-1]):
             n_3 = hull.points[visible_facet]
-            e = sp[-1]
+            energy = sp[-1]
 
             norm = np.cross(n_3[2] - n_3[0], n_3[1] - n_3[0])
-            norm = norm / np.linalg.norm(norm)  # plane normal
-            D = np.dot(norm, n_3[0])  # plane constant
+            plane_norm = norm / np.linalg.norm(norm)  # plane normal
+            plane_constant = np.dot(plane_norm, n_3[0])  # plane constant
 
-            return e - (D - norm[0] * sp[0] - norm[1] * sp[1]) / norm[2]
+            return (
+                energy
+                - (plane_constant - plane_norm[0] * sp[0] - plane_norm[1] * sp[1])
+                / plane_norm[2]
+            )
 
     print("Failed to find distance to hull")
     return 1e6
 
 
-def piecewise_linear(x, vals) -> Any:
+def piecewise_linear(x, vals) -> np.ndarray:
     """
     Piecewise linear.
 
diff --git a/docs/user/flows/flows.md b/docs/user/flows/flows.md
index f336d4352..bd4eeb50f 100644
--- a/docs/user/flows/flows.md
+++ b/docs/user/flows/flows.md
@@ -53,6 +53,8 @@ The `autoplex` workflow will then perform automated VASP and `phonopy` calculati
 Of course, you can change and adjust the settings to your own needs, e.g. by setting a smaller supercell for the 
 `phonopy` calculations using `CompleteDFTvsMLBenchmarkWorkflow(min_length=15).make(...)`. 
 You can find more details on the subsequent tutorial pages.
+With additional flows or jobs in the `[complete_flow]` list, 
+you can combine the `autoplex` flow with other flows and jobs.
 
 The following workflow diagram will give you an overview of the flows and jobs in the default autoplex workflow:
 ```{mermaid}
diff --git a/tests/fitting/test_fitting_regularization.py b/tests/fitting/test_fitting_regularization.py
index 2730d972b..a65f220d3 100644
--- a/tests/fitting/test_fitting_regularization.py
+++ b/tests/fitting/test_fitting_regularization.py
@@ -7,7 +7,7 @@
     get_convex_hull,
     get_e_distance_to_hull,
     get_intersect,
-    get_x,
+    get_mole_frac,
     label_stoichiometry_volume,
     point_in_triangle_ND,
     point_in_triangle_2D,
@@ -63,79 +63,123 @@ def test_set_sigma(test_dir):
 
 
 def test_auxiliary_functions(test_dir, memory_jobstore, clean_dir):
-    from jobflow import run_locally
     from ase.io import read
+    from ase import Atoms
     import numpy as np
-    import scipy
 
     file = test_dir / "fitting" / "ref_files" / "quip_train.extxyz"
-
-    atoms = read(file, ":")
-
-    try:
-        get_convex = get_convex_hull(atoms)
-
-        responses = run_locally(
-            get_convex, ensure_success=True, create_folders=True, store=memory_jobstore
-        )
-
-    except ValueError:
-        print("\nDOES NOT run as intended, error 'Convex hull failed to include 10/10 structures'")
-        assert True
-
-    generic_array = np.array([1, 2, 3, 4, 5])
-
-    try:
-        get_e_dist_hull = get_e_distance_to_hull(generic_array, atoms)
-    except AttributeError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    point1, point2, point3, point4 = [1, 5], [2, 9], [8, 7], [9, 3]
-    point = np.array([[1, 2, 3], [4, 5, 6]])
-
+    atoms: Atoms = read(file, ":")
+
+    # Define the arrays
+    array1 = np.array([
+        [15.2266087, -3.80983557],
+        [15.2266087, -3.81106994],
+        [16.2004607, -3.81927384],
+        [8000.0, -0.28663766]
+    ])
+
+    array2 = np.array([
+        [15.2266087, -3.80983557],
+        [15.2266087, -3.81106994],
+        [16.2004607, -3.81927384],
+        [16.2004607, -3.81927264],
+        [16.4281758, -3.81869979],
+        [17.6913485, -3.80636951],
+        [17.6913485, -3.80665250],
+        [19.0176670, -3.77969777],
+        [8000.0, -0.27567309],
+        [8000.0, -0.28663766]
+    ])
+
+    array3 = np.array([
+        [0.5, 17.6913485, -3.53493109],
+        [0.5, 15.2266087, -3.53839715],
+        [0.5, 16.2004607, -3.54783542],
+        [0.5, 16.2004607, -3.54783422],
+        [0.5, 17.6913485, -3.53521408],
+        [0.5, 16.4281758, -3.54726137],
+        [0.5, 19.017667, -3.50825935],
+        [0.5, 15.2266087, -3.53963152],
+        [1.0, 8000.0, -0.01928852],
+        [1.0, 8000.0, -0.00014539]
+    ])
+
+    lower_half_hull_points, points = get_convex_hull(atoms, energy_name="REF_energy")
+    assert np.allclose(lower_half_hull_points, array1)
+    assert np.allclose(points, array2)
+
+    label = label_stoichiometry_volume(atoms, {3: -0.28649227, 17: -0.25638457}, "REF_energy")
+    assert np.allclose(label, array3)
+
+    calc_hull = calculate_hull_ND(points)
+    calc_hull_3D = calculate_hull_3D(label)
+    fraction_list = [[1.0]] + [[0.0]] + [[0.5]] * 8
+
+    for atom, fraction in zip(atoms, fraction_list):
+        get_e_dist_hull = get_e_distance_to_hull(calc_hull, atom, energy_name="REF_energy")
+        assert get_e_dist_hull == 0
+        get_e_dist_hull_3D = get_e_distance_to_hull_3D(calc_hull_3D, atom, {3: -0.28649227, 17: -0.25638457},
+                                                       "REF_energy")
+        assert round(get_e_dist_hull_3D) == 0
+        getmole_frac = get_mole_frac(atom, element_order=[3, 17])
+        assert getmole_frac == fraction
+
+    point1, point2, point3, point4 = (1, 5), (2, 9), (8, 7), (9, 3)
     get_inter = get_intersect(point1, point2, point3, point4)
-
-    try:
-        getx = get_x(atoms)
-    except AttributeError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    try:
-        label = label_stoichiometry_volume(atoms, {3: -0.28649227, 17: -0.25638457}, "energy")
-    except IndexError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    try:
-        point_ND = point_in_triangle_ND(point)
-    except ValueError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
+    assert get_inter == (4.75, 20.0)
     point_2d = point_in_triangle_2D(point1, point2, point3, point4)
-
-    try:
-        calc_hull = calculate_hull_ND(point)
-    except scipy.spatial._qhull.QhullError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    try:
-        calc_hull_3D = calculate_hull_3D(point)
-    except scipy.spatial._qhull.QhullError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    try:
-        get_e_dist_hull_3D = get_e_distance_to_hull_3D(generic_array, atoms, {3: -0.28649227, 17: -0.25638457}, "energy")
-    except AttributeError:
-        print("\nTODO: implement proper unit test")
-        assert True
-
-    try:
-        piece_lin = piecewise_linear(point1, point)
-    except IndexError:
-        print("\nTODO: implement proper unit test")
-        assert True
+    assert point_2d is False
+
+    # Define test values
+    vals = [
+        (1.0, [1.0, 2.0, 3.0]),
+        (2.0, [2.0, 3.0, 4.0]),
+        (3.0, [3.0, 4.0, 5.0]),
+        (4.0, [4.0, 5.0, 6.0])
+    ]
+
+    # Define test values
+    x = 2.5
+    expected_result = np.array([2.5, 3.5, 4.5])
+
+    piece_lin = piecewise_linear(x, vals)
+    assert np.allclose(piece_lin, expected_result)
+
+    # Define a test case for 2D (Triangle)
+    point_2D_inside = np.array([0.5, 0.5])
+    region_2D = [
+        np.array([0.0, 0.0]),
+        np.array([1.0, 0.0]),
+        np.array([0.0, 1.0])
+    ]
+
+    point_2D_outside = np.array([1.5, 1.5])
+
+    # Test 2D case
+    inside_result_2D = point_in_triangle_ND(point_2D_inside, *region_2D)
+    outside_result_2D = point_in_triangle_ND(point_2D_outside, *region_2D)
+
+    # Point point_2D_inside inside region:
+    assert inside_result_2D
+    # Point point_2D_outside outside region:
+    assert not outside_result_2D
+
+    # Define a test case for 3D (Tetrahedron)
+    point_3D_inside = np.array([0.25, 0.25, 0.25])
+    region_3D = [
+        np.array([0.0, 0.0, 0.0]),
+        np.array([1.0, 0.0, 0.0]),
+        np.array([0.0, 1.0, 0.0]),
+        np.array([0.0, 0.0, 1.0])
+    ]
+
+    point_3D_outside = np.array([1.0, 1.0, 1.0])
+
+    # Test 3D case
+    inside_result_3D = point_in_triangle_ND(point_3D_inside, *region_3D)
+    outside_result_3D = point_in_triangle_ND(point_3D_outside, *region_3D)
+
+    # Point point_3D_inside inside region:
+    assert inside_result_3D
+    # Point point_3D_outside outside region:
+    assert not outside_result_3D

From 517c48adfd6de8e035cde8ef01c397c7adb7437d Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Fri, 5 Jul 2024 18:19:38 +0200
Subject: [PATCH 3/6] documentation improvements

---
 README.md                | 31 ++++---------------------------
 docs/dev/contributing.md | 26 ++++++++++++++++++++++++++
 docs/index.md            |  3 ++-
 docs/user/index.md       |  2 +-
 4 files changed, 33 insertions(+), 29 deletions(-)
 create mode 100644 docs/dev/contributing.md

diff --git a/README.md b/README.md
index 65055a9b7..278b406d8 100644
--- a/README.md
+++ b/README.md
@@ -7,33 +7,6 @@
 
 `autoplex` is an evolving project and **contributions are very welcome**! To ensure that the code remains of high quality, please raise a pull request for any contributions, which will be reviewed before integration into the main branch of the code. In the beginning, Janine will take care of the reviews.
 
-# General code structure
-- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future)
-  - autoplex/submodule/job.py (any jobs defined will be inside this module)
-  - autoplex/submodule/flows.py (workflows defined will be hosted in this module)
-  - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module)
-
-# Guidelines for contributions
-- Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples).
-- Please ensure high coverage of the code based on the tests (you can test this with `coverage`).
-- Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people)
-- Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code).
-- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish.
-
-# Formatting requirements
-- Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`).
-- If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`).
-
-# Commit guidelines
-1. `pip install pre-commit`.
-2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml)
-3. Step 1 and 2 needs to be done only once in the local repository
-4. Proceed with modifying the code and adding commits as usual. This should automatically run the linters.
-5. To manually run the pre-commit hooks on all files, just use `pre-commit run --all-files`
-6. To run pre-commit on a specific file, use `pre-commit run --files path/to/your/modified/module/`
-
-Please check out atomate2 for example code (https://github.com/materialsproject/atomate2)
-
 # Setup
 
 In order to setup the mandatory prerequisites to be able to use `autoplex`, please follow the [installation guide of atomate2](https://materialsproject.github.io/atomate2/user/install.html).
@@ -71,6 +44,10 @@ Pkg.add("DataFrames")
 Pkg.add("CSV")
 ```
 
+# Contributing guidelines
+
+Please follow the [contributing guidelines](docs/dev/contributing.md)!
+
 # Workflow overview
 
 The following [Mermaid](https://mermaid.live/) diagram will give you an overview of the flows and jobs in the default autoplex workflow:
diff --git a/docs/dev/contributing.md b/docs/dev/contributing.md
new file mode 100644
index 000000000..42d32d635
--- /dev/null
+++ b/docs/dev/contributing.md
@@ -0,0 +1,26 @@
+# General code structure
+- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future)
+  - autoplex/submodule/job.py (any jobs defined will be inside this module)
+  - autoplex/submodule/flows.py (workflows defined will be hosted in this module)
+  - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module)
+
+# Guidelines for contributions
+- Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples).
+- Please ensure high coverage of the code based on the tests (you can test this with `coverage`).
+- Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people)
+- Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code).
+- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish.
+
+# Formatting requirements
+- Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`).
+- If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`).
+
+# Commit guidelines
+1. `pip install pre-commit`.
+2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml)
+3. Step 1 and 2 needs to be done only once in the local repository
+4. Proceed with modifying the code and adding commits as usual. This should automatically run the linters.
+5. To manually run the pre-commit hooks on all files, just use `pre-commit run --all-files`
+6. To run pre-commit on a specific file, use `pre-commit run --files path/to/your/modified/module/`
+
+Please check out atomate2 for example code (https://github.com/materialsproject/atomate2)
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 05a91ef15..5678f9e09 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -13,9 +13,10 @@ reference/index
 ```
 
 ```{toctree}
-:caption: Developer Guide
+:caption: Contirbuting Guide
 :hidden:
 dev/dev_install
+dev/contributing
 ```
 
 ```{toctree}
diff --git a/docs/user/index.md b/docs/user/index.md
index 9cd930abd..c0f2b1eca 100644
--- a/docs/user/index.md
+++ b/docs/user/index.md
@@ -3,6 +3,6 @@ Getting started
 ```{include} ../../README.md
 ---
 start-line: 3
-end-line: 73
+end-line: 51
 ---
 ```

From f5849e95435bd51b80ae311260256c694704bc34 Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Fri, 5 Jul 2024 18:42:37 +0200
Subject: [PATCH 4/6] improving docstrings and documentation

---
 autoplex/fitting/common/jobs.py |  2 +-
 docs/dev/contributing.md        | 19 ++++++++++---------
 docs/user/index.md              |  5 ++++-
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/autoplex/fitting/common/jobs.py b/autoplex/fitting/common/jobs.py
index de175620f..020e2a987 100644
--- a/autoplex/fitting/common/jobs.py
+++ b/autoplex/fitting/common/jobs.py
@@ -1,4 +1,4 @@
-"""fitting using GAP."""
+"""General fitting jobs using several MLIPs available."""
 from __future__ import annotations
 
 from pathlib import Path
diff --git a/docs/dev/contributing.md b/docs/dev/contributing.md
index 42d32d635..3d0896f70 100644
--- a/docs/dev/contributing.md
+++ b/docs/dev/contributing.md
@@ -1,21 +1,22 @@
-# General code structure
-- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future)
-  - autoplex/submodule/job.py (any jobs defined will be inside this module)
-  - autoplex/submodule/flows.py (workflows defined will be hosted in this module)
-  - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module)
-
 # Guidelines for contributions
 - Please write unit tests; this is a requirement for any added code to be accepted. (Automated testing will be performed using `pytest`; you can look into the `tests` folder for examples).
 - Please ensure high coverage of the code based on the tests (you can test this with `coverage`).
 - Please use numpy docstrings (use an IDE and switch on this docstring type; you can check examples in our code base; the docstring should be useful for other people)
 - Please ensure that type hints are added for each variable, function, class, and method (this helps code readability, especially if someone else wants to build on your code).
-- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish.
+- Please write the code in a way that gives users the option to change parameters (this is mainly applicable, for example, fitting protocols/flows). In other words, please avoid hardcoding settings or physical properties. 
+Reasonable default values should be set, but the user needs to have the opportunity to modify them if they wish.
+
+## General code structure
+- We are currently aiming to follow the code structure below for each submodule (This is an initial idea; of course, this could change depending on the needs in the future)
+  - autoplex/submodule/job.py (any jobs defined will be inside this module)
+  - autoplex/submodule/flows.py (workflows defined will be hosted in this module)
+  - autoplex/submodule/utils.py (all functions that act as utilities for defining flow or job, for example, a small subtask to calculate some metric or plotting, will be hosted in this module)
 
-# Formatting requirements
+## Formatting requirements
 - Variable names should be descriptive and should use snake case (`variable_name`, not `VariableName`).
 - If you define a `Maker`, please use python class naming convention (e.g., `PhononMaker`, `RssMaker`).
 
-# Commit guidelines
+## Commit guidelines
 1. `pip install pre-commit`.
 2. Next, run `pre-commit install` (this will install all the hooks from pre-commit-config.yaml)
 3. Step 1 and 2 needs to be done only once in the local repository
diff --git a/docs/user/index.md b/docs/user/index.md
index c0f2b1eca..338c03aea 100644
--- a/docs/user/index.md
+++ b/docs/user/index.md
@@ -3,6 +3,9 @@ Getting started
 ```{include} ../../README.md
 ---
 start-line: 3
-end-line: 51
+end-line: 46
 ---
 ```
+# Contributing guidelines
+
+Please follow the [contributing guidelines](../dev/contributing.md)!

From fc5acb10fbb85690c9c3475eafb629876ba3730b Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Fri, 5 Jul 2024 18:45:14 +0200
Subject: [PATCH 5/6] improving documentation

---
 docs/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 5678f9e09..7dbd6cbc5 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -13,7 +13,7 @@ reference/index
 ```
 
 ```{toctree}
-:caption: Contirbuting Guide
+:caption: Contributing Guide
 :hidden:
 dev/dev_install
 dev/contributing
@@ -80,9 +80,9 @@ assumes that you have a broad understanding of the key concepts.
 :link: dev/dev_install
 :link-type: doc
 :class-header: bg-light
-**Developer guide**
+**Contributing Guide**
 ^^^
 Do you want to develop your own workflows or improve existing functionalities?
-Check out the developer guide.
+Check out the contributing Guide.
 :::
 ::::

From 9c349d6426c7a30da826a707c849f9dc55d3a512 Mon Sep 17 00:00:00 2001
From: QuantumChemist <c.zitlau@live.com>
Date: Fri, 5 Jul 2024 19:43:04 +0200
Subject: [PATCH 6/6] improving the documentation

---
 docs/user/flows/flows.md    |   3 +
 docs/user/jobflowremote.md  | 203 ++++++++++++++++++++++++++++++++++++
 docs/user/setup.md          |   9 +-
 docs/user/test_project.yaml |  77 ++++++++++++++
 4 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 docs/user/jobflowremote.md
 create mode 100644 docs/user/test_project.yaml

diff --git a/docs/user/flows/flows.md b/docs/user/flows/flows.md
index bd4eeb50f..7296be0fe 100644
--- a/docs/user/flows/flows.md
+++ b/docs/user/flows/flows.md
@@ -127,6 +127,9 @@ You can manage your `autoplex` workflow using [`FireWorks`](https://materialspro
 Please follow the installation and setup instructions on the respective guide website.
 Both packages rely on the [MongoDB](https://www.mongodb.com/) database manager for data storage.
 
+We recommend using `jobflow-remote` as it is more flexible to use, especially on clusters where users cannot store their
+own MongoDB. You can find a more comprehensive `jobflow-remote` tutorial [here](../jobflowremote.md).
+
 Submission using `FireWorks`:
 ```python
 from fireworks import LaunchPad
diff --git a/docs/user/jobflowremote.md b/docs/user/jobflowremote.md
new file mode 100644
index 000000000..a58c49e24
--- /dev/null
+++ b/docs/user/jobflowremote.md
@@ -0,0 +1,203 @@
+# Jobflow-remote setup
+
+This will result in a setup for automation where 
+1. We will add/submit job to db on your local machine.
+2. Jobs will be executed on your remote custer.
+
+# Installation
+
+## on your local machine
+1. Create a new env > `conda create -n autoplex python=3.10`. (You can choose any other env name.)
+2. Activate your env > `conda activate autoplex` 
+3. Clone the jobflow remote repository using `git clone https://github.com/Matgenix/jobflow-remote.git`
+4. Switch to interactive branch (use `git checkout remotes/origin/interactive`) and install it via `pip install .` in your env.
+5. Install autoplex > In your local autoplex directory: `pip install -e .[strict]`. 
+6. Activate your env and run `jf project generate --full YOUR_PROJECT_NAME`. 
+This will generate an empty project config file in your home directory. 
+You can find this file inside `~/.jfremote` 
+(This is optional, a config file is provided here: [test_project.yaml](test_project.yaml), 
+you can simply copy this file to the  `~/.jfremote` directory. You will need to create `~/.jfremote` directory in your home.)
+
+
+## on your remote cluster
+7. Repeat step 1,2,3,4 and 5 on your remote cluster.
+8. Now setup atomate2 config as usual.
+Just `atomate2/config/atomate2.yaml`. (We do not need to set up jobflow.yaml in atomate2/config)
+
+Below is an example `atomat2.yaml` config file
+```yaml
+VASP_CMD:  your hpc vasp_std cmd
+VASP_GAMMA_CMD:  your hpc vasp_gam cmd
+LOBSTER_CMD: your hpc lobster cmd
+```
+
+9. Add environment variable to your ~/.bashrc `export ATOMATE2_CONFIG_FILE="/path/to/atomate2/config/atomate2.yaml"`
+
+## Changes to be done in the config file - on your local machine
+1. Set paths to base, tmp, log, daemon dir. Best would be, simply creating empty dirs in your `~/.jfremote` directory. 
+Use the paths as provided in sample config file for reference.
+2. Under the `workers` section of the yaml, change worker name from `example_worker` to your liking, set `work_dir` 
+(directory where calcs will be run), set `pre_run` command (use to activate the environment before job execution), 
+set `user` (this your username on your remote cluster)  
+3. In `queue` section, just change details as per your mongodb (admin username password, host, port, name)
+
+
+# Check if your setup works correctly
+
+> Note: If you have any password protected key in your `~/.ssh` directory worker might fail to start. To overcome this, temporarily move your passphrase procted keys from `~/.ssh` directory to some other directory before starting the runner.
+
+1. `jf project check -w example_worker` 
+(If everything is setup correctly, you will get asked for password and OTP and will exit with a green tick in few secs.)
+2. `jf project check --errors` this will check all even your MongoDB connection is proper or not. 
+If anything fails, please check the config file.
+
+
+# Getting started
+
+1. Run `jf admin reset` (Do not worry, this will reset your db, necessary to do only once. 
+You can skip this if you want to keep the data in your db.)
+2. `jf runner start -s -i` 
+
+You will be prompted with a question "Do you want to open the connection for the host of the XXX worker?" 
+Answer "y". And then you should be prompted for password and OTP.
+After that you can quit the interactive mode with ctrl+c. The runner should now be working fine until the connection drops.
+ 
+During the starting of the runner, you will probably see a few error/warnings. 
+First, a warning that the password may be echoed. Ignore it, it should not.  
+
+3. `jf runner status` (this should return status of runner as `running`, if everything is set up correctly)
+
+
+# Example job scripts to test (Add/Submit jobs to DB from your local machine)
+
+## Simple python job
+
+```python
+from jobflow_remote.utils.examples import add
+from jobflow_remote import submit_flow
+from jobflow import Flow
+
+job1 = add(1, 2)
+job2 = add(job1.output, 2)
+
+flow = Flow([job1, job2])
+
+resources = {"nodes": N, "partition": "name", "time": "01:00:00", "ntasks": ntasks, "qverbatim": "#SBATCH --get-user-env",
+             "mail_user": "your_email@adress", "mail_type": "ALL"}
+
+print(submit_flow(flow, worker="example_worker", resources=resources, project="test_project")) 
+# Do not forget to change worker and project name to what you se tup in the jobflow remote config file.
+```
+
+## VASP relax job using atomate2 workflow
+
+```python
+from jobflow_remote.utils.examples import add
+from jobflow_remote import submit_flow
+from jobflow import Flow
+from mp_api.client import MPRester
+from atomate2.vasp.flows.core import DoubleRelaxMaker
+from atomate2.vasp.powerups import update_user_incar_settings 
+
+
+mpid = "mp-22862"
+mr = MPRester(api_key='YOUR_MP_API_KEY')
+struct = mr.get_structure_by_material_id(mpid)
+
+# we use the same structure (mp-22862) here and instantiate the workflow
+relax_job = DoubleRelaxMaker().make(structure=struct)
+
+relax_job = update_user_incar_settings(relax_job, {"NPAR": 4})
+
+# You can also pass exe_config for the worker using exe_config in submit flow. Below is an example 
+# exec_config={"pre_run": "source activate autoplex \n module load slurm_setup \n module load vasp/6.1.2"}
+
+resources = {"nodes": N, "partition": "name", "time": "01:00:00", "ntasks": ntasks, "qverbatim": "#SBATCH --get-user-env",
+             "mail_user": "your_email@adress", "mail_type": "ALL"}
+
+print(submit_flow(relax_job, worker="example_worker", resources=resources, project="test_project"))
+```
+It is crucial to set `"qverbatim": "#SBATCH --get-user-env"` to make sure the same environment is used on your remote cluster.
+
+# Setting different workers for different job types
+
+This is very much similar to how we do in atomate2, jobflow-remote provides a specific utility for this.
+```python
+from jobflow_remote import set_run_config
+```
+An example use case can be found [here](https://matgenix.github.io/jobflow-remote/user/tuning.html#jobconfig)
+
+# Querying completed jobs from DB using jobflow-remote Python API
+
+```python
+from jobflow_remote import get_jobstore
+
+js = get_jobstore(project_name='YOUR_PROJECT_NAME') 
+js.connect()
+result = js.query(criteria={"name": "generate_frequencies_eigenvectors"},load=True) 
+# example query for completed phonon workflow runs
+# the query methods are the same as in atomate2 basically, 
+for i in result:
+    print(i['output']["phonon_bandstructure"]) 
+    # get phonon banstructure pymatgen object
+```
+
+# Updating failed jobs time limit or execution config
+```python
+from jobflow_remote.jobs.jobcontroller import JobController
+
+jc = JobController.from_project_name(project_name='YOUR_PROJECT_NAME') # initialize a job controller
+
+job_docs = jc.get_jobs_doc(db_ids='214') # query job docs based on different criteria 
+# (Check documentation to see all available options https://github.com/Matgenix/jobflow-remote/blob/967e7c512f230105b1a82c2227fb101d8d4acb3d/src/jobflow_remote/jobs/jobcontroller.py#L467)
+
+# get your existing resources
+resources = job_docs[0].resources
+
+# update time limit in the retrieved dict (you can update any other keys like partition/ nodes etc as well)
+resources["time"] = '8:00:00'
+
+jc.rerun_job(db_id=job_docs[0].db_id, force=True) # important for jobs that are in failed state to reset them first
+jc.set_job_run_properties(db_ids=[job_docs[0].db_id], resources=resources) # this will update the DB entry
+```
+
+> IMPORTANT: When you restart VASP calculations, make sure to move the old VASP files somewhere else, 
+> because jobflow-remote will restart your calculation in the same directory and that leads to some clash of old and new files.
+
+# Update pre-exsiting job input parameters in the db
+
+```python
+# Note that this way is bit involved and you need to find exact structure of your nested db entry based on type of maker used
+
+# Following is an example for failed vasp job where NPAR and ALGO tags in DB entry are updated
+from jobflow_remote.jobs.jobcontroller import JobController
+
+jc = JobController.from_project_name(project_name='YOUR_PROJECT_NAME')
+
+job_collection = jc.db.jobs # get jobs collection from mongoDB
+
+for i in job_collection.find({'db_id': '214'}):
+    job_dict = i['_id'] # get object id in mongodb (this is used to as filter)
+    incar_settings = i['job']['function']['@bound']['input_set_generator']['user_incar_settings'] # get existing user incar settings
+
+incar_settings.update({'NPAR': 2, 'ALGO': 'FAST'}) # now update incar settings here as per requirement
+job_collection.update_one({'_id': job_dict}, {'$set': {'job.function.@bound.input_set_generator.user_incar_settings' : incar_settings}})
+
+print(jc.get_jobs_doc(db_ids='214')[0].job.maker.input_set_generator.user_incar_settings) # check if entries are updated
+```
+> IMPORTANT: When you restart VASP calculations, make sure to move the old VASP files somewhere else, 
+> because jobflow-remote will restart your calculation in the same directory and that leads to some clash of old and new files.
+
+# Some useful commands
+
+1. `jf job list` (list jobs in the db)
+2. `jf flow list` (list of flows in the db)
+3. `jf job info jobid` (provides some info of job like workdir, error info if it failed)
+4. `jf flow delete -did db_id` (deletes flow from db)
+5. `jf flow -h` or `jf job -h` for checking other options
+
+# Some useful links
+
+1. Check slurm.py for finding different available options you can set for resources dict [here](https://github.com/Matgenix/qtoolkit/tree/develop/src/qtoolkit/io)
+2. More details on project config and settings can be found [here](https://matgenix.github.io/jobflow-remote/user/projectconf.html)
+3. Details on different setup options [here](https://matgenix.github.io/jobflow-remote/user/install.html)
diff --git a/docs/user/setup.md b/docs/user/setup.md
index b3e5b3409..36af44236 100644
--- a/docs/user/setup.md
+++ b/docs/user/setup.md
@@ -5,4 +5,11 @@ We are referring the user to the [installation guide of atomate2](https://materi
 be able to use `autoplex`.
 
 After setting up `atomate2`, make sure to add `VASP_INCAR_UPDATES: {"NPAR": number}` in your ~/atomate2/config/atomate2.yaml file. 
-Set a number that is a divisor of the number of tasks you use for the VASP calculations.
\ No newline at end of file
+Set a number that is a divisor of the number of tasks you use for the VASP calculations.
+
+You can manage your `autoplex` workflow using [`FireWorks`](https://materialsproject.github.io/fireworks/) or [`jobflow-remote`](https://matgenix.github.io/jobflow-remote/). 
+Please follow the installation and setup instructions on the respective guide website.
+Both packages rely on the [MongoDB](https://www.mongodb.com/) database manager for data storage.
+
+We recommend using `jobflow-remote` as it is more flexible to use, especially on clusters where users cannot store their
+own MongoDB. You can find a more comprehensive `jobflow-remote` tutorial [here](jobflowremote.md).
\ No newline at end of file
diff --git a/docs/user/test_project.yaml b/docs/user/test_project.yaml
new file mode 100644
index 000000000..c3ce3767f
--- /dev/null
+++ b/docs/user/test_project.yaml
@@ -0,0 +1,77 @@
+name: test_project
+base_dir: /home/username/.jfremote/test_project
+tmp_dir: /home/username/.jfremote/test_project/tmp
+log_dir: /home/username/.jfremote/test_project/log
+daemon_dir: /home/username/.jfremote/test_project/daemon
+log_level: debug
+runner:
+  delay_checkout: 30
+  delay_check_run_status: 30
+  delay_advance_status: 30
+  delay_refresh_limited: 600
+  delay_update_batch: 60
+  lock_timeout: 86400
+  delete_tmp_folder: true
+  max_step_attempts: 3
+  delta_retry:
+  - 30
+  - 300
+  - 1200
+workers:
+  example_worker:
+    type: remote
+    scheduler_type: slurm
+    work_dir: /path/to/your/scratch/dir
+    resources:
+    pre_run: |
+       source activate autoplex
+    post_run:
+    timeout_execute: 120
+    max_jobs: 10
+    batch:
+    host: remote cluster
+    user: username
+    port:
+    password:
+    key_filename:
+    passphrase:
+    gateway:
+    forward_agent:
+    connect_timeout:
+    connect_kwargs:
+    inline_ssh_env:
+    keepalive: 60
+    shell_cmd: bash
+    login_shell: true
+    interactive_login: true
+queue:
+  store:
+    type: MongoStore
+    host: local machine
+    database: db name
+    username: user name
+    password: password
+    collection_name: jobs
+  flows_collection: flows
+  auxiliary_collection: jf_auxiliary
+  db_id_prefix:
+exec_config: {}
+jobstore:
+  docs_store:
+    type: MongoStore
+    database: db name
+    host: local machine
+    port: 27017
+    username: user name
+    password: password
+    collection_name: outputs
+  additional_stores:
+    data:
+      type: GridFSStore
+      database: db name
+      host: local machine
+      port: 27017
+      username: user name
+      password: password
+      collection_name: outputs_blobs
+metadata:
\ No newline at end of file