Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX/ENH/RF: Introduce configuration module and catch lingering tmpdirs #466

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions heudiconv/cli/run.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,38 @@
#!/usr/bin/env python

import logging
import os
import sys
from argparse import ArgumentParser

from .. import __version__
from .. import __version__, config
from ..main import workflow

lgr = logging.getLogger(__name__)


def reset_config(func):
"""Decorator to reset configuration"""
from importlib import reload

def _reset(argv=None):
reload(config)
func(argv)
config._cleanup()
reload(config)
return _reset


@reset_config
def main(argv=None):
parser = get_parser()
args = parser.parse_args(argv)
opts = parser.parse_args(argv)
# exit if nothing to be done
if not args.files and not args.dicom_dir_template and not args.command:
if not any((opts.files, opts.dicom_dir_template, opts.command)):
lgr.warning("Nothing to be done - displaying usage help")
parser.print_help()
sys.exit(1)

kwargs = vars(args)
workflow(**kwargs)
workflow(**vars(opts))


def get_parser():
Expand All @@ -44,6 +55,7 @@ def get_parser():
group.add_argument(
'--files',
nargs='*',
type=os.path.abspath,
help='Files (tarballs, dicoms) or directories containing files to '
'process. Cannot be provided if using --dicom_dir_template.')
parser.add_argument(
Expand Down
188 changes: 188 additions & 0 deletions heudiconv/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""
A Python module for managing configurations across a heudiconv run.
"""

import atexit
import os
from pathlib import Path


# avoid telemetry check if user desires
_disable_et = bool(
os.getenv("NO_ET") is not None
or os.getenv("NIPYPE_NO_ET") is not None
)
os.environ["NIPYPE_NO_ET"] = "1"
os.environ["NO_ET"] = "1"
_latest_version = "Unknown"

if not _disable_et:
# Ensure analytics hit only once
from contextlib import suppress
from requests import get, ConnectionError, ReadTimeout
with suppress((ConnectionError, ReadTimeout)):
res = get("https://rig.mit.edu/et/projects/nipy/heudiconv", timeout=0.5)
try:
_latest_version = res.json()['version']
except Exception:
pass
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why direct calls via requests and not use of etelemetry module client? by all means we want to avoid reimplementing what it does/created for.



class _Config:
"""An abstract class forbidding instantiation."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expand that subclasses must not be instantiated, all methods must be @classmethod or @abstractmethod?


_paths = tuple()

def __init__(self):
"""Avert instantiation."""
raise RuntimeError('Configuration type is not instantiable.')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not to start using abc and thus declare this as an abc.abstractmethod? according to https://docs.python.org/3/library/abc.html#abc.abstractmethod could be applied to @classmethod as well


@classmethod
def load(cls, settings, init=True):
"""Store settings from a dictionary."""
for k, v in settings.items():
if v is None:
continue
if k in cls._paths:
setattr(cls, k, Path(v).absolute())
continue
if k in cls._multipaths:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't then _multipath be also assigned at the class level? Please add comments along on their purpose and difference between (_paths and _multipaths) -- would become irrelevant if "semantics" are defined per each attribute as suggested below

setattr(cls, k, tuple(Path(i).absolute() for i in v))
if hasattr(cls, k):
setattr(cls, k, v)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and if not - we just completely ignore the setting without even a warning? target use-case?


if init:
try:
cls.init()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add empty init to the base class then and describe its purpose. if subclass MUST define init (seems to be not the case, may be we should just start using abc and declare init as @abc.abstractmethod. Altogether - I would really like to avoid try/except here since it could mask bugs in init code, and interface of this base _Config should provide sufficient information about what subclasses should overload

except AttributeError:
pass

@classmethod
def get(cls):
"""Return defined settings."""
out = {}
for k, v in cls.__dict__.items():
if k.startswith('_') or v is None:
continue
if callable(getattr(cls, k)):
continue
if k in cls._paths:
v = str(v)
if k in cls._multipaths:
v = tuple(str(i) for i in v)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should there be some sanity check that there is no duplicate entries in _paths and _multipaths? -- becomes irrelevant if "semantics" as encapsulated as suggested in other comment

out[k] = v
return out


class workflow(_Config):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be make it into workflow_config? Even though it is config.workflow, whenever type etc would be printed, module name would not be included and thus could lead to a confusion with some real "workflow" (not just config)?

"""Configure run-level settings."""

_paths = (
'dcmconfig',
'outdir',
'conv_outdir',
)
_multipaths = (
'files',
)

dicom_dir_template = None
"Template to search one or more directories for DICOMs and tarballs."
files = None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overall it would be better to avoid splitting "semantics" from the field and _multipaths (and _paths).
Why not to declare dummy classes NonePath and NoneMultiPath and use them to annotate fields , and then make _paths and _multipath populated in top level __init__ based on the values it finds among class level attributes? That would help to avoid bugs whenever some attribute is renamed/introduced but _*paths is not adjusted accordingly

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, there seems to be no way to "access" these "variable docstrings":

$> cat /tmp/1.py
class bu:
   at = None
   """at docstring"""

print(bu.at.__doc__)

$> python3 /tmp/1.py
None

so we wouldn't be able to provide user-friendly dump of them etc. Why not to provide some encapsulation?

NB may be even just use https://www.attrs.org/en/stable/ for this purpose (and switch away from using static single class and @classmethod)s? If central instance is needed, it could be instantiated at the top level module. FWIW, we do smth like that for datalad: https://github.com/datalad/datalad/blob/master/datalad/__init__.py#L47 where we instantiate "global config" but still allowing per-dataset configurations which might override some settings. This provides flexibility where necessary.

"Files (tarballs, DICOMs) or directories containing files to process."
subjs = None
"List of target subject IDs."
converter = 'dcm2niix'
"Tool to use for DICOM conversion."
outdir = Path('.').absolute()
"Output directory for conversion."
locator = None
"Study path under ``outdir``."
conv_outdir = None
"Anonymization output directory for converted files."
anon_cmd = None
"Command to run to anonymize subject IDs."
heuristic = None
"Path to custom file or name of built-in heuristic."
with_prov = False
"Store additional provenance information."
session = None
"Session for longitudinal studies."
bids_options = None
"Generate relevant BIDS files."
overwrite = False
"Overwrite existing converted files."
datalad = None
"Store the entire collection as DataLad datasets."
debug = None
"Do not catch exceptions and show traceback."
grouping = None
"DICOM grouping method."
minmeta = None
"Minimize BIDS sidecar metadata."
random_seed = None
"Random seed to initialize PRNG."
dcmconfig = None
"JSON file for additional dcm2niix configuration."
queue = None
"Batch system to submit jobs in parallel."
queue_args = None
"Additional queue arguments."
command = None
"Custom action to be performed on provided files"

@classmethod
def init(cls):
"""Initialize heudiconv execution"""
from .utils import load_heuristic

if cls.heuristic:
cls.heuristic = load_heuristic(cls.heuristic)
if cls.random_seed is not None:
_init_seed(cls.random_seed)


def from_dict(settings):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this one is "workflow" specific... should then be ideally bound to the class one way or another (I know -- in current implementation it would conflict with definition of attributes/options straight at the class level - see my comment about attrs)

"""Read and load settings from a flat dictionary."""
workflow.load(settings)


def get(flat=False):
"""Get config as a dict."""
settings = {
'workflow': workflow.get(),
}
if not flat:
return settings

return {'.'.join((section, k)): v
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: seems to be not covered by the tests.

for section, configs in settings.items()
for k, v in configs.items()}


def _init_seed(seed):
import random
import numpy as np
random.seed(seed)
np.random.seed(seed)


def add_tmpdir(tmpdir):
"""Track temporary directories"""
_tmpdirs.add(tmpdir)


def _cleanup():
"""Cleanup tracked temporary directories"""
import shutil

for tmpdir in _tmpdirs:
try:
shutil.rmtree(tmpdir)
except FileNotFoundError:
pass


_tmpdirs = set()
# ensure cleanup of temporary directories occurs at exit
atexit.register(_cleanup)
Loading