diff --git a/CHANGELOG.md b/CHANGELOG.md index 06fa4ce..0b2a3f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- all `asyncmd.utils` methods now require a MDEngine (class) to dispatch to the correct engine submodule methods - GmxEngine `apply_constraints` and `generate_velocities` methods: rename `wdir` argument to `workdir` to make it consistent with `prepare` and `prepare_from_files` (also add the `workdir` argument to the MDEngine ABC). ### Fixed diff --git a/docs/source/conf.py b/docs/source/conf.py index 7f8599d..0aac22d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -82,9 +82,10 @@ { "name": "PyPI", "url": "https://pypi.org/project/asyncmd/", - # download stats are broken? (August 2025) - #"icon": "https://img.shields.io/pypi/dm/asyncmd", - "icon": "https://img.shields.io/pypi/v/asyncmd", + "icon": "https://img.shields.io/pypi/dm/asyncmd?label=pypi%20downloads", + # keep the replacement icon for PyPi link, but commented out + # (for the next time download stats are broken) + #"icon": "https://img.shields.io/pypi/v/asyncmd", "type": "url", }, { diff --git a/src/asyncmd/config.py b/src/asyncmd/config.py index 6d29201..c21ef04 100644 --- a/src/asyncmd/config.py +++ b/src/asyncmd/config.py @@ -64,7 +64,7 @@ def set_max_process(num: int | None = None, max_num: int | None = None) -> None: spawning hundreds of processes. """ # NOTE: I think we should use a conservative default, e.g. 0.25*cpu_count() - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global _SEMAPHORES if num is None: if (logical_cpu_count := os.cpu_count()) is not None: @@ -107,7 +107,7 @@ def set_max_files_open(num: int | None = None, margin: int = 30) -> None: # semaphores from non-async code, but sometimes use the sync subprocess.run # and subprocess.check_call [which also need files/pipes to work]) # also maybe we need other open files like a storage :) - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global _SEMAPHORES rlim_soft = resource.getrlimit(resource.RLIMIT_NOFILE)[0] if num is None: @@ -157,7 +157,7 @@ def set_slurm_max_jobs(num: int | None) -> None: The maximum number of simultaneous SLURM jobs for this invocation of python/asyncmd. `None` means do not limit the maximum number of jobs. """ - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global _OPT_SEMAPHORES if num is None: _OPT_SEMAPHORES[_OPT_SEMAPHORES_KEYS.SLURM_MAX_JOB] = None @@ -195,7 +195,7 @@ def set_trajectory_cache_type(cache_type: str, ValueError Raised if ``cache_type`` is not one of the allowed values. """ - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global _GLOBALS allowed_values = ["h5py", "npz", "memory"] if (cache_type := cache_type.lower()) not in allowed_values: @@ -252,7 +252,7 @@ def register_h5py_cache(h5py_group: "h5py.Group | h5py.File", copy_h5py: bool = clear_old_cache : bool, optional Whether to clear the old/previously set cache, by default False. """ - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global _GLOBALS if _GLOBALS.get(_GLOBALS_KEYS.TRAJECTORY_FUNCTION_CACHE_TYPE, "not_set") != "h5py": # nothing to copy as h5py was not the old cache type @@ -315,6 +315,9 @@ def deregister_h5py_cache(h5py_group: "h5py.Group | h5py.File"): _deregister_h5py_cache_for_all_trajectories(h5py_group=h5py_group) if _GLOBALS.get(_GLOBALS_KEYS.H5PY_CACHE, None) is h5py_group: _GLOBALS[_GLOBALS_KEYS.H5PY_CACHE] = None + logger.warning("Deregistered global writeable h5py cache. No TrajectoryFunction" + " values will be cached until a new h5py cache has been registered." + ) if h5py_group in _GLOBALS.get(_GLOBALS_KEYS.H5PY_CACHE_READ_ONLY_FALLBACKS, []): _GLOBALS[_GLOBALS_KEYS.H5PY_CACHE_READ_ONLY_FALLBACKS].remove(h5py_group) diff --git a/src/asyncmd/gromacs/mdengine.py b/src/asyncmd/gromacs/mdengine.py index d15797c..9fd6761 100644 --- a/src/asyncmd/gromacs/mdengine.py +++ b/src/asyncmd/gromacs/mdengine.py @@ -508,8 +508,8 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *, k=6, ) ) - swdir = os.path.join(wdir, run_name) - await aiofiles.os.mkdir(swdir) + wdir = os.path.join(wdir, run_name) + await aiofiles.os.mkdir(wdir) constraints_mdp = copy.deepcopy(self.mdp) constraints_mdp["continuation"] = "no" if constraints else "yes" constraints_mdp["gen-vel"] = "yes" if generate_velocities else "no" @@ -523,12 +523,12 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *, # make sure we have draw a new/different random number for gen-vel constraints_mdp["gen-seed"] = -1 constraints_mdp["nsteps"] = 0 - await self._run_grompp(workdir=swdir, deffnm=run_name, + await self._run_grompp(workdir=wdir, deffnm=run_name, trr_in=conf_in.trajectory_files[0], - tpr_out=os.path.join(swdir, f"{run_name}.tpr"), + tpr_out=os.path.join(wdir, f"{run_name}.tpr"), mdp_obj=constraints_mdp) - cmd_str = self._mdrun_cmd(tpr=os.path.join(swdir, f"{run_name}.tpr"), - workdir=swdir, + cmd_str = self._mdrun_cmd(tpr=os.path.join(wdir, f"{run_name}.tpr"), + workdir=wdir, deffnm=run_name) logger.debug("About to execute gmx mdrun command for constraints and" "/or velocity generation: %s", @@ -537,7 +537,7 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *, stdout = bytes() await self._acquire_resources_gmx_mdrun() mdrun_proc = await self._start_gmx_mdrun( - cmd_str=cmd_str, workdir=swdir, + cmd_str=cmd_str, workdir=wdir, run_name=run_name, # TODO: we hardcode that the 0step MD runs can not be longer than 15 min # (but i think this should be fine for randomizing velocities and/or @@ -563,7 +563,7 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *, # the FrameExtractor (i.e. MDAnalysis) handle any potential conversions engine_traj = Trajectory( trajectory_files=os.path.join( - swdir, f"{run_name}{self._num_suffix(1)}.trr" + wdir, f"{run_name}{self._num_suffix(1)}.trr" ), structure_file=conf_in.structure_file, ) @@ -571,17 +571,17 @@ async def _0step_md(self, conf_in: Trajectory, conf_out_name: str, *, # Note: we use extract (and not extract_async) because otherwise # it can happen in super-rare circumstances that the Trajectory # we just instantiated is "replaced" by a Trajectory with the - # same hash but a different filename/path, then the extraction + # same hash but a different filename/path. If then in addition + # this trajectory is removed before extracting, the extraction # fails. If we dont await this can not happen since we do not # give up control in between. - out_traj = extractor.extract(outfile=conf_out_name, - traj_in=engine_traj, - idx=len(engine_traj) - 1, - ) - return out_traj + return extractor.extract(outfile=conf_out_name, + traj_in=engine_traj, + idx=len(engine_traj) - 1, + ) finally: - await self._cleanup_gmx_mdrun(workdir=swdir, run_name=run_name) - shutil.rmtree(swdir) # remove the whole directory we used as wdir + await self._cleanup_gmx_mdrun(workdir=wdir, run_name=run_name) + shutil.rmtree(wdir) # remove the whole directory we used as wdir async def prepare(self, starting_configuration: Trajectory | None | str, workdir: str, deffnm: str) -> None: diff --git a/src/asyncmd/gromacs/utils.py b/src/asyncmd/gromacs/utils.py index 1d05d33..bdbded6 100644 --- a/src/asyncmd/gromacs/utils.py +++ b/src/asyncmd/gromacs/utils.py @@ -18,6 +18,7 @@ The general variants of these functions can be found in asyncmd.utils. """ import os +import math import logging import aiofiles.os @@ -58,12 +59,12 @@ def get_value_from_mdp(k): v = mdp[k] except KeyError: # not set, defaults to 0 - v = float("inf") + v = math.inf else: - # need to check for 0 (== no output!) in case somone puts the + # need to check for 0 (== no output!) in case someone puts the # defaults (or reads an mdout.mdp where gmx lists all the defaults) if not v: - v = float("inf") + v = math.inf return v if traj_type.upper() == "TRR": @@ -75,7 +76,7 @@ def get_value_from_mdp(k): vals = [] for k in keys: vals += [get_value_from_mdp(k=k)] - if (nstout := min(vals)) == float("inf"): + if (nstout := min(vals)) == math.inf: raise ValueError(f"The MDP you passed results in no {traj_type} " + "trajectory output.") if traj_type.upper == "TRR": @@ -83,7 +84,7 @@ def get_value_from_mdp(k): # (if they are defined) additional_keys = ["nstvout", "nstfout"] for k in additional_keys: - if (v := get_value_from_mdp(k=k)) != float("inf"): + if (v := get_value_from_mdp(k=k)) != math.inf: if v % nstout: logger.warning("%s trajectory output is not a multiple of " "the nstxout frequency (%s=%d, nstxout=%d).", @@ -136,7 +137,7 @@ async def get_all_file_parts(folder: str, deffnm: str, file_ending: str) -> "lis deffnm : str deffnm (prefix of filenames) used in the simulation. file_ending : str - File ending of the requested filetype (with or without preceeding "."). + File ending of the requested filetype (with or without preceding "."). Returns ------- diff --git a/src/asyncmd/slurm/config.py b/src/asyncmd/slurm/config.py index 1efca03..727785b 100644 --- a/src/asyncmd/slurm/config.py +++ b/src/asyncmd/slurm/config.py @@ -69,7 +69,7 @@ def set_all_slurm_settings(*, sinfo_executable: str = "sinfo", List of nodes to exclude in job submissions, by default None, which results in no excluded nodes. """ - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global SlurmProcess SlurmProcess.slurm_cluster_mediator = SlurmClusterMediator( sinfo_executable=sinfo_executable, @@ -124,7 +124,7 @@ def set_slurm_settings(*, sinfo_executable: str | None = None, List of nodes to exclude in job submissions, by default None, which results in no excluded nodes. """ - # pylint: disable-next=global-variable-not-assigned + # pylint: disable-next=global-statement global SlurmProcess # collect options for slurm cluster mediator mediator_options: dict[str, str | int | list[str]] = {} diff --git a/src/asyncmd/trajectory/propagate.py b/src/asyncmd/trajectory/propagate.py index 26f04a3..022a6ea 100644 --- a/src/asyncmd/trajectory/propagate.py +++ b/src/asyncmd/trajectory/propagate.py @@ -253,6 +253,7 @@ async def remove_parts(self, workdir: str, deffnm: str, *, folder=workdir, deffnm=deffnm, file_ending=ending.lower(), + engine=self.engine_cls, ) # make sure we dont miss anything because we have different # capitalization @@ -261,6 +262,7 @@ async def remove_parts(self, workdir: str, deffnm: str, *, folder=workdir, deffnm=deffnm, file_ending=ending.upper(), + engine=self.engine_cls, ) await asyncio.gather(*(aiofiles.os.unlink(f) for f in parts_to_remove diff --git a/src/asyncmd/trajectory/trajectory_cache.py b/src/asyncmd/trajectory/trajectory_cache.py index 660f5f1..51a5349 100644 --- a/src/asyncmd/trajectory/trajectory_cache.py +++ b/src/asyncmd/trajectory/trajectory_cache.py @@ -500,9 +500,13 @@ def __init__(self, traj_hash: int, traj_files: list[str]) -> None: ) # setup (writeable) main cache if we have it if writeable_h5py_cache is None: - logger.warning("Initializing a Trajectory cache in h5py with only " - "read-only h5py.Groups associated. Newly calculated " - "function values will not be cached!") + # This can spam the log, because it happens once per trajectory that is + # read from a read-only storage, e.g. for analysis + logger.debug("Initializing h5py Trajectory cache with only " + "read-only h5py.Groups associated. Newly calculated " + "function values will not be cached! (trajectory files=%s)", + traj_files, + ) self._main_cache = None else: self._main_cache = OneH5PYGroupTrajectoryFunctionValueCache( @@ -542,10 +546,16 @@ def deregister_h5py_cache(self, h5py_cache: "h5py.File | h5py.Group") -> None: """ if self._main_cache is not None: if self._main_cache.h5py_cache is h5py_cache: - logger.warning( - "Deregistering the writeable (main) cache (%s). " - "Newly calculated function values will not be cached!", - h5py_cache + # This can spam the log, because it happens once per trajectory + # in existence when closing/deregistering a h5py cache. + # We warn once when deregistering the h5py cache (in central config.py) + # and use debug here for now + logger.debug( + "Deregistering the writeable (main) cache (%s) for Trajectory " + "consisting of files %s." + "Newly calculated function values will not be cached until " + "a new h5py cache group is registered or the cache type changed!", + h5py_cache, self._traj_files, ) self._main_cache = None # found it so it can not be a fallback_cache, so get out of here diff --git a/src/asyncmd/utils.py b/src/asyncmd/utils.py index 5db178f..632e16d 100644 --- a/src/asyncmd/utils.py +++ b/src/asyncmd/utils.py @@ -20,6 +20,8 @@ It also includes various functions to retrieve or ensure important parameters from MDConfig/MDEngine combinations, such as nstout_from_mdconfig and ensure_mdconfig_options. """ +import logging + from .mdengine import MDEngine from .mdconfig import MDConfig from .trajectory.trajectory import Trajectory @@ -28,7 +30,11 @@ from .gromacs import mdconfig as gmx_config -async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list[Trajectory]: +logger = logging.getLogger(__name__) + + +async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine | type[MDEngine], + ) -> list[Trajectory]: """ List all trajectories in folder by given engine class with given deffnm. @@ -37,10 +43,12 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list folder : str Absolute or relative path to a folder. deffnm : str - deffnm used by the engines simulation run from which we want the trajs. - engine : MDEngine - The engine that produced the trajectories - (or one from the same class and with similar init args) + deffnm used by the engines simulation run from which we want the trajectories. + engine : MDEngine | type[MDEngine] + The engine that produced the trajectories (or one from the same class + and with similar init args). Note that it is also possible to pass an + uninitialized engine class, but then the default trajectory output type + will be returned. Returns ------- @@ -52,7 +60,17 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list ValueError Raised when the engine class is unknown. """ - if isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)): + # test for uninitialized engine classes, we warn but return the default traj type + if isinstance(engine, type) and issubclass(engine, MDEngine): + logger.warning("Engine %s is not initialized, i.e. it is an engine class. " + "Returning the default output trajectory type for this " + "engine class.", engine) + if ( + isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)) + or (isinstance(engine, type) # check that it is a type otherwise issubclass might not work + and issubclass(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)) + ) + ): return await gmx_utils.get_all_traj_parts(folder=folder, deffnm=deffnm, traj_type=engine.output_traj_type, ) @@ -61,6 +79,7 @@ async def get_all_traj_parts(folder: str, deffnm: str, engine: MDEngine) -> list async def get_all_file_parts(folder: str, deffnm: str, file_ending: str, + engine: MDEngine | type[MDEngine], ) -> list[str]: """ Find and return all files with given ending produced by a `MDEngine`. @@ -75,16 +94,24 @@ async def get_all_file_parts(folder: str, deffnm: str, file_ending: str, deffnm (prefix of filenames) used in the simulation. file_ending : str File ending of the requested filetype (with or without preceding "."). + engine : MDEngine | type[MDEngine] + The engine or engine class that produced the file parts. Returns ------- list[str] Ordered list of filepaths for files with given ending. """ - # TODO: we just use the function from the gromacs engines for now, i.e. we - # assume that the filename scheme will be the same for other engines - return await gmx_utils.get_all_file_parts(folder=folder, deffnm=deffnm, - file_ending=file_ending) + if ( + isinstance(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)) + or (isinstance(engine, type) # check that it is a type otherwise issubclass might not work + and issubclass(engine, (gmx_engine.GmxEngine, gmx_engine.SlurmGmxEngine)) + ) + ): + return await gmx_utils.get_all_file_parts(folder=folder, deffnm=deffnm, + file_ending=file_ending) + raise ValueError(f"Engine {engine} is not a known MDEngine (class)." + + " Maybe someone just forgot to add the function?") def nstout_from_mdconfig(mdconfig: MDConfig, output_traj_type: str) -> int: diff --git a/tests/helper_classes.py b/tests/helper_classes.py index def8c49..6d4d7a2 100644 --- a/tests/helper_classes.py +++ b/tests/helper_classes.py @@ -28,13 +28,14 @@ class NoOpMDEngine(MDEngine): current_trajectory = None output_traj_type = "TEST" steps_done = 0 - async def apply_constraints(self, conf_in: Trajectory, conf_out_name: str) -> Trajectory: + async def apply_constraints(self, conf_in: Trajectory, conf_out_name: str, + *, workdir: str = ".") -> Trajectory: pass async def prepare(self, starting_configuration: Trajectory, workdir: str, deffnm: str) -> None: pass async def prepare_from_files(self, workdir: str, deffnm: str) -> None: pass - async def run_walltime(self, walltime: float) -> Trajectory: + async def run_walltime(self, walltime: float, max_steps: int | None = None) -> Trajectory: pass async def run_steps(self, nsteps: int, steps_per_part: bool = False) -> Trajectory: pass diff --git a/tests/test_utils.py b/tests/test_utils.py index 5ed2ecd..e5a6345 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -19,9 +19,12 @@ i.e. no tests for, e.g., functions from asyncmd.gromacs.utils. """ import pytest +import logging from conftest import NoOpMDEngine, NoOpMDConfig -from asyncmd.utils import (get_all_traj_parts, nstout_from_mdconfig, +from asyncmd.utils import (get_all_traj_parts, + get_all_file_parts, + nstout_from_mdconfig, ensure_mdconfig_options, ) @@ -34,6 +37,13 @@ async def test_get_all_traj_parts(self): engine=NoOpMDEngine(), ) + @pytest.mark.asyncio + async def test_get_all_file_parts(self): + with pytest.raises(ValueError): + await get_all_file_parts(folder="test", deffnm="test", + file_ending=".test", engine=NoOpMDEngine(), + ) + def test_nstout_from_mdconfig(self): with pytest.raises(ValueError): nstout_from_mdconfig(mdconfig=NoOpMDConfig(), @@ -42,3 +52,19 @@ def test_nstout_from_mdconfig(self): def test_ensure_mdconfig_options(self): with pytest.raises(ValueError): ensure_mdconfig_options(mdconfig=NoOpMDConfig()) + + +class Test_warn_for_default_value_from_engine_class: + @pytest.mark.asyncio + async def test_get_all_traj_parts(self, caplog): + with pytest.raises(ValueError): + with caplog.at_level(logging.WARNING): + await get_all_traj_parts(folder="test", deffnm="test", + # this time we use an uninitialized + # engine class so we get the warning + # (and then fail after) + engine=NoOpMDEngine, + ) + warn_text = f"Engine {NoOpMDEngine} is not initialized, i.e. it is an engine class. " + warn_text += "Returning the default output trajectory type for this engine class." + assert warn_text in caplog.text