-
Notifications
You must be signed in to change notification settings - Fork 128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FIX/ENH/RF: Introduce configuration module and catch lingering tmpdirs #466
Changes from 5 commits
6ff4485
9853482
a17d590
77ac542
7f3937d
eb93cb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
""" | ||
A Python module for managing configurations across a heudiconv run. | ||
""" | ||
|
||
import atexit | ||
import os | ||
from pathlib import Path | ||
|
||
|
||
# avoid telemetry check if user desires | ||
_disable_et = bool( | ||
os.getenv("NO_ET") is not None | ||
or os.getenv("NIPYPE_NO_ET") is not None | ||
) | ||
os.environ["NIPYPE_NO_ET"] = "1" | ||
os.environ["NO_ET"] = "1" | ||
_latest_version = "Unknown" | ||
|
||
if not _disable_et: | ||
# Ensure analytics hit only once | ||
from contextlib import suppress | ||
from requests import get, ConnectionError, ReadTimeout | ||
with suppress((ConnectionError, ReadTimeout)): | ||
res = get("https://rig.mit.edu/et/projects/nipy/heudiconv", timeout=0.5) | ||
try: | ||
_latest_version = res.json()['version'] | ||
except Exception: | ||
pass | ||
|
||
|
||
class _Config: | ||
"""An abstract class forbidding instantiation.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. expand that subclasses must not be instantiated, all methods must be |
||
|
||
_paths = tuple() | ||
|
||
def __init__(self): | ||
"""Avert instantiation.""" | ||
raise RuntimeError('Configuration type is not instantiable.') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not to start using |
||
|
||
@classmethod | ||
def load(cls, settings, init=True): | ||
"""Store settings from a dictionary.""" | ||
for k, v in settings.items(): | ||
if v is None: | ||
continue | ||
if k in cls._paths: | ||
setattr(cls, k, Path(v).absolute()) | ||
continue | ||
if k in cls._multipaths: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't then |
||
setattr(cls, k, tuple(Path(i).absolute() for i in v)) | ||
if hasattr(cls, k): | ||
setattr(cls, k, v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and if not - we just completely ignore the setting without even a warning? target use-case? |
||
|
||
if init: | ||
try: | ||
cls.init() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add empty |
||
except AttributeError: | ||
pass | ||
|
||
@classmethod | ||
def get(cls): | ||
"""Return defined settings.""" | ||
out = {} | ||
for k, v in cls.__dict__.items(): | ||
if k.startswith('_') or v is None: | ||
continue | ||
if callable(getattr(cls, k)): | ||
continue | ||
if k in cls._paths: | ||
v = str(v) | ||
if k in cls._multipaths: | ||
v = tuple(str(i) for i in v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should there be some sanity check that there is no duplicate entries in |
||
out[k] = v | ||
return out | ||
|
||
|
||
class workflow(_Config): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. May be make it into |
||
"""Configure run-level settings.""" | ||
|
||
_paths = ( | ||
'dcmconfig', | ||
'outdir', | ||
'conv_outdir', | ||
) | ||
_multipaths = ( | ||
'files', | ||
) | ||
|
||
dicom_dir_template = None | ||
"Template to search one or more directories for DICOMs and tarballs." | ||
files = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. overall it would be better to avoid splitting "semantics" from the field and There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, there seems to be no way to "access" these "variable docstrings": $> cat /tmp/1.py
class bu:
at = None
"""at docstring"""
print(bu.at.__doc__)
$> python3 /tmp/1.py
None
so we wouldn't be able to provide user-friendly dump of them etc. Why not to provide some encapsulation? NB may be even just use https://www.attrs.org/en/stable/ for this purpose (and switch away from using static single class and |
||
"Files (tarballs, DICOMs) or directories containing files to process." | ||
subjs = None | ||
"List of target subject IDs." | ||
converter = 'dcm2niix' | ||
"Tool to use for DICOM conversion." | ||
outdir = Path('.').absolute() | ||
"Output directory for conversion." | ||
locator = None | ||
"Study path under ``outdir``." | ||
conv_outdir = None | ||
"Anonymization output directory for converted files." | ||
anon_cmd = None | ||
"Command to run to anonymize subject IDs." | ||
heuristic = None | ||
"Path to custom file or name of built-in heuristic." | ||
with_prov = False | ||
"Store additional provenance information." | ||
session = None | ||
"Session for longitudinal studies." | ||
bids_options = None | ||
"Generate relevant BIDS files." | ||
overwrite = False | ||
"Overwrite existing converted files." | ||
datalad = None | ||
"Store the entire collection as DataLad datasets." | ||
debug = None | ||
"Do not catch exceptions and show traceback." | ||
grouping = None | ||
"DICOM grouping method." | ||
minmeta = None | ||
"Minimize BIDS sidecar metadata." | ||
random_seed = None | ||
"Random seed to initialize PRNG." | ||
dcmconfig = None | ||
"JSON file for additional dcm2niix configuration." | ||
queue = None | ||
"Batch system to submit jobs in parallel." | ||
queue_args = None | ||
"Additional queue arguments." | ||
command = None | ||
"Custom action to be performed on provided files" | ||
|
||
@classmethod | ||
def init(cls): | ||
"""Initialize heudiconv execution""" | ||
from .utils import load_heuristic | ||
|
||
if cls.heuristic: | ||
cls.heuristic = load_heuristic(cls.heuristic) | ||
if cls.random_seed is not None: | ||
_init_seed(cls.random_seed) | ||
|
||
|
||
def from_dict(settings): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this one is "workflow" specific... should then be ideally bound to the class one way or another (I know -- in current implementation it would conflict with definition of attributes/options straight at the class level - see my comment about |
||
"""Read and load settings from a flat dictionary.""" | ||
workflow.load(settings) | ||
|
||
|
||
def get(flat=False): | ||
"""Get config as a dict.""" | ||
settings = { | ||
'workflow': workflow.get(), | ||
} | ||
if not flat: | ||
return settings | ||
|
||
return {'.'.join((section, k)): v | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note: seems to be not covered by the tests. |
||
for section, configs in settings.items() | ||
for k, v in configs.items()} | ||
|
||
|
||
def _init_seed(seed): | ||
import random | ||
import numpy as np | ||
random.seed(seed) | ||
np.random.seed(seed) | ||
|
||
|
||
def add_tmpdir(tmpdir): | ||
"""Track temporary directories""" | ||
_tmpdirs.add(tmpdir) | ||
|
||
|
||
def _cleanup(): | ||
"""Cleanup tracked temporary directories""" | ||
import shutil | ||
|
||
for tmpdir in _tmpdirs: | ||
try: | ||
shutil.rmtree(tmpdir) | ||
except FileNotFoundError: | ||
pass | ||
|
||
|
||
_tmpdirs = set() | ||
# ensure cleanup of temporary directories occurs at exit | ||
atexit.register(_cleanup) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why direct calls via
requests
and not use ofetelemetry
module client? by all means we want to avoid reimplementing what it does/created for.