From 3905be3c88ca719280e22c76edf66526e700f358 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Thu, 5 Oct 2023 16:56:26 +0100 Subject: [PATCH 01/22] Fix ingesting TNG-like simulations --- setup.py | 2 +- tangos/input_handlers/caterpillar.py | 1 + tangos/input_handlers/finding.py | 8 ++++++-- tangos/input_handlers/pynbody.py | 25 +++++++++++++++++-------- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 99cca5aa..168a546c 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ 'pytest >= 5.0.0', 'webtest >= 2.0', 'pyquery >= 1.3.0', - 'pynbody >= 1.2.2', + 'pynbody >= 1.3.2', 'yt>=3.4.0', 'PyMySQL>=1.0.2', ] diff --git a/tangos/input_handlers/caterpillar.py b/tangos/input_handlers/caterpillar.py index 2c465e26..38165b2b 100644 --- a/tangos/input_handlers/caterpillar.py +++ b/tangos/input_handlers/caterpillar.py @@ -12,6 +12,7 @@ class CaterpillarInputHandler(PynbodyInputHandler): patterns = ["snapdir_???"] + auxiliary_file_patterns = ["halos/halos_???", "halos_???"] @classmethod def _snap_id_from_snapdir_path(cls, path): diff --git a/tangos/input_handlers/finding.py b/tangos/input_handlers/finding.py index 8973fb45..d79bcdf0 100644 --- a/tangos/input_handlers/finding.py +++ b/tangos/input_handlers/finding.py @@ -68,8 +68,8 @@ def enumerate_timestep_extensions(self): extensions = find(basename=base + "/", patterns=self.patterns) logger.info("Enumerate timestep extensions base=%r patterns=%s", base, self.patterns) for e in extensions: - if self._is_able_to_load(e): - yield e[len(base) + 1:] + if self._is_able_to_load(self._transform_extension(e)): + yield self._transform_extension(e[len(base) + 1:]) else: logger.info("Could not load %s with class %s", e, self) @@ -78,3 +78,7 @@ def _is_able_to_load(self, fname): Override in child class to filter the pattern-based file matches""" return True + + def _transform_extension(self, extension_name): + """Can be overriden by child classes to map from the literal filename discovered to a different name for loading""" + return extension_name diff --git a/tangos/input_handlers/pynbody.py b/tangos/input_handlers/pynbody.py index 030eb474..3fee78a3 100644 --- a/tangos/input_handlers/pynbody.py +++ b/tangos/input_handlers/pynbody.py @@ -326,7 +326,7 @@ class GadgetSubfindInputHandler(PynbodyInputHandler): _property_prefix_for_type = {'halo': 'sub_'} - _sub_parent_name = 'sub_groupNr' + _sub_parent_names = ['sub_groupNr'] _hidden_properties = ['children', 'group_len', 'group_off', 'Nsubs', 'groupNr', 'len', 'off'] @@ -381,9 +381,9 @@ def available_object_property_names_for_timestep(self, ts_extension, object_type if new_p.startswith("_"): new_p = new_p[1:] properties[i] = new_p - if p==self._sub_parent_name: + if p in self._sub_parent_names: properties[i] = 'parent' - if p=='children': # NB 'children' is generated by pynbody for both Subfind and SubfindHDF catalogues + if p == 'children': # NB 'children' is generated by pynbody for both Subfind and SubfindHDF catalogues properties[i] = 'child' properties = [p for p in properties if p not in self._hidden_properties] @@ -401,7 +401,9 @@ def iterate_object_properties_for_timestep(self, ts_extension, object_typetag, p pynbody_properties = h.get_halo_properties(i,with_unit=False) if k=='parent': - adapted_k = self._sub_parent_name + for adapted_k in self._sub_parent_names: + if adapted_k in pynbody_properties.keys(): + break else: adapted_k = pynbody_prefix + k if adapted_k not in pynbody_properties: @@ -409,7 +411,7 @@ def iterate_object_properties_for_timestep(self, ts_extension, object_typetag, p if adapted_k in pynbody_properties: data = self._resolve_units(pynbody_properties[adapted_k]) - if adapted_k == self._sub_parent_name and data is not None: + if adapted_k in self._sub_parent_names and data is not None: # turn into a link data = proxy_object.IncompleteProxyObjectFromFinderId(data, 'group') elif k=='child' and "children" in pynbody_properties: @@ -425,18 +427,25 @@ def iterate_object_properties_for_timestep(self, ts_extension, object_typetag, p yield all_data class Gadget4HDFSubfindInputHandler(GadgetSubfindInputHandler): - patterns = ["snapshot_???.hdf5"] - auxiliary_file_patterns =["fof_subhalo_tab_???.hdf5"] + patterns = ["snapshot_???.hdf5", "snapshot_???.0.hdf5", "snap_???.hdf5", "snap_???.0.hdf5"] + auxiliary_file_patterns =["fof_subhalo_tab_???.hdf5", "fof_subhalo_tab_???.0.hdf5"] snap_class_name = "pynbody.snapshot.gadgethdf.GadgetHDFSnap" catalogue_class_name = "pynbody.halo.Gadget4SubfindHDFCatalogue" _property_prefix_for_type = {'halo': 'Subhalo', 'group': 'Group'} - _sub_parent_name = 'SubhaloGroupNr' + _sub_parent_names = ['SubhaloGroupNr', 'SubhaloGrNr'] _hidden_properties = ['Len', 'LenType', 'OffsetType', 'ParentRank', 'RankInGr', 'Nr', 'Ascale', 'FirstSub', 'OffsetType'] + def _transform_extension(self, extension_name): + if extension_name.endswith(".0.hdf5"): + return extension_name[:-7] + else: + return extension_name + + class GadgetRockstarInputHandler(PynbodyInputHandler): patterns = ["snapshot_???"] From d50f4b2816cd5a86a7f5698e79e277608bcfea0c Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Thu, 5 Oct 2023 17:37:45 +0100 Subject: [PATCH 02/22] Add r200c, r200m and rvirial properties --- tangos/properties/pynbody/__init__.py | 2 +- tangos/properties/pynbody/radius.py | 85 +++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 tangos/properties/pynbody/radius.py diff --git a/tangos/properties/pynbody/__init__.py b/tangos/properties/pynbody/__init__.py index f2ae399b..914c49b6 100644 --- a/tangos/properties/pynbody/__init__.py +++ b/tangos/properties/pynbody/__init__.py @@ -8,4 +8,4 @@ class PynbodyPropertyCalculation(PropertyCalculation): PynbodyHaloProperties = PynbodyPropertyCalculation # old name, to be deprecated -from . import BH, SF, centring, eagle, gas, images, mass, profile, zoom +from . import BH, SF, centring, eagle, gas, images, mass, profile, zoom, radius diff --git a/tangos/properties/pynbody/radius.py b/tangos/properties/pynbody/radius.py new file mode 100644 index 00000000..00b23d7d --- /dev/null +++ b/tangos/properties/pynbody/radius.py @@ -0,0 +1,85 @@ +from . import PynbodyHaloProperties +from .centring import centred_calculation + + +class Radius(PynbodyHaloProperties): + + @staticmethod + def _get_overdensity_contrast(): + raise NotImplementedError("This is meant to be an abstract class") + + @classmethod + def get_contrast(cls): + return cls._get_overdensity_contrast() + + @staticmethod + def _get_reference_definition(): + raise NotImplementedError("This is meant to be an abstract class") + + @classmethod + def get_rhodef(cls): + return cls._get_reference_definition() + + @centred_calculation + def calculate(self, particle_data, existing_properties): + import pynbody.analysis as analysis + self._ensure_pynbody_mass_array_loaded_family_level(particle_data) + + # Virial radius is calculated using density contributions form all families + return analysis.halo.virial_radius(particle_data, overden=self.get_contrast(), rho_def=self.get_rhodef()) + + def region_specification(self, existing_properties): + import pynbody + return pynbody.filt.Sphere(existing_properties['max_radius'] * 3, + existing_properties['shrink_center']) + + def requires_property(self): + return ["shrink_center", "max_radius"] + + @staticmethod + def _ensure_pynbody_mass_array_loaded_family_level(particle_data): + # Make sure the mass array is loaded at family level + import pynbody.family + for family in particle_data.families(): + if family is pynbody.family.dm: + particle_data.d['mass'] + if family is pynbody.family.star: + particle_data.s['mass'] + if family is pynbody.family.gas: + particle_data.g['mass'] + + +class Radius200m(Radius): + names = "r200m" + + @staticmethod + def _get_overdensity_contrast(): + return 200 + + @staticmethod + def _get_reference_definition(): + return 'matter' + + +class Radius200c(Radius): + names = "r200c" + + @staticmethod + def _get_overdensity_contrast(): + return 200 + + @staticmethod + def _get_reference_definition(): + return 'critical' + + +class RadiusVirial(Radius): + names = "rvirial" + + @staticmethod + def _get_overdensity_contrast(): + return 178 + + @staticmethod + def _get_reference_definition(): + return 'matter' \ No newline at end of file From 3ed2c21fbc2fe0dc1553819d3caddd0189b96378 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Sun, 8 Oct 2023 16:18:26 +0100 Subject: [PATCH 03/22] Clarify log output from tangos write --- tangos/cached_writer.py | 8 +++++--- tangos/tools/property_writer.py | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tangos/cached_writer.py b/tangos/cached_writer.py index f55f4fae..7f5fdf3e 100644 --- a/tangos/cached_writer.py +++ b/tangos/cached_writer.py @@ -16,18 +16,20 @@ def create_property(halo, name, prop, session): def _insert_list_unlocked(property_list): session = core.get_default_session() - + number = 0 for p in property_list: if p[2] is not None: session.add(create_property(p[0], p[1], p[2], session)) + number += 1 session.commit() + return number def insert_list(property_list): from tangos import parallel_tasks as pt if pt.backend!=None: with pt.ExclusiveLock("insert_list"): - _insert_list_unlocked(property_list) + return _insert_list_unlocked(property_list) else: - _insert_list_unlocked(property_list) + return _insert_list_unlocked(property_list) diff --git a/tangos/tools/property_writer.py b/tangos/tools/property_writer.py index 15b2f902..7c454b4a 100644 --- a/tangos/tools/property_writer.py +++ b/tangos/tools/property_writer.py @@ -246,9 +246,9 @@ def _is_commit_needed(self, end_of_timestep, end_of_simulation): def _commit_results_if_needed(self, end_of_timestep=False, end_of_simulation=False): if self._is_commit_needed(end_of_timestep, end_of_simulation): - logger.info("Attempting to commit %d halo properties...", len(self._pending_properties)) - insert_list(self._pending_properties) - logger.info("%d properties were committed", len(self._pending_properties)) + logger.info("Attempting to commit halo properties...") + num_properties = insert_list(self._pending_properties) + logger.info(f"...{num_properties} properties were committed") self._pending_properties = [] self._start_time = time.time() self.timing_monitor.summarise_timing(logger) @@ -459,8 +459,10 @@ def run_timestep_calculation(self, db_timestep): logger.info("Successfully gathered existing properties; calculating halo properties now...") - logger.info(" %d halos to consider; %d property calculations for each of them", - len(db_halos), len(self._property_calculator_instances)) + logger.info(" %d halos to consider; %d calculation routines for each of them, resulting in %d properties per halo", + len(db_halos), len(self._property_calculator_instances), + sum([1 if isinstance(x.names, str) else len(x.names) for x in self._property_calculator_instances]) + ) for db_halo, existing_properties in \ self._get_parallel_halo_iterator(list(zip(db_halos, self._existing_properties_all_halos))): From 86f5defbc98ad78e71ec3e63fad765c9ae3ea653 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 11:31:05 +0100 Subject: [PATCH 04/22] Clarify how one class is preferred over another when asking for a property's providing_class This leads to a version bump to 1.8.0 because in principle it may produce different results. (The previous approach was ambiguous in some cases, so would lead to very unstable results.) --- tangos/__init__.py | 2 +- tangos/properties/__init__.py | 37 ++++++++++++++++++++++++++-- tangos/tools/property_writer.py | 5 ++++ tests/test_property_class_mapping.py | 33 ++++++++++++++++++++++++- 4 files changed, 73 insertions(+), 4 deletions(-) diff --git a/tangos/__init__.py b/tangos/__init__.py index 7dc61842..de40f8d1 100644 --- a/tangos/__init__.py +++ b/tangos/__init__.py @@ -20,4 +20,4 @@ from .core import * from .query import * -__version__ = '1.7.1' +__version__ = '1.8.0' diff --git a/tangos/properties/__init__.py b/tangos/properties/__init__.py index b34aa5c3..638aee0e 100644 --- a/tangos/properties/__init__.py +++ b/tangos/properties/__init__.py @@ -421,7 +421,20 @@ def all_properties(with_particle_data=True): def providing_class(property_name, handler_class=None, silent_fail=False): """Return property calculator class for given property name when files will be loaded by specified handler. - If handler_class is None, return "live" properties which can be calculated without particle data""" + If handler_class is None, return "live" properties which can be calculated without particle data. + + When more than one possible class is capable of calculating the requested property, the following criteria + are used to select one. The guiding criterion is to select user-provided code of the greatest specificity. + + 1) If possible, the most specialised class in terms of the class hierarchy is used. So if a user-defined class + inherits from tangos.properties.pynbody.CentreAndRadius, and implements the same properties, the child class + will be preferred + 2) If there is no class hierarchy, the class defined in the tangos codebase is de-prioritised over any externally + provided classes + 3) If there is still a tie, the string representation of the classname (including the module) is used to sort + alphabetically. This has no particular rationale except to make reproducible results. + + """ candidates = all_providing_classes(property_name) @@ -461,9 +474,29 @@ def cmp(a, b): if a is b: return 0 elif issubclass(a, b): + return -1 + elif issubclass(b, a): return 1 else: - return -1 + # Next, let's see if one of these is internal to tangos. If so, we'll define it as + # less specialised than the user-provided other one + if a.__module__.startswith("tangos.") and not b.__module__.startswith("tangos."): + return 1 + elif b.__module__.startswith("tangos.") and not a.__module__.startswith("tangos."): + return -1 + else: + # out of sensible ways to order, now we just go alphabetical + a_name = a.__module__ + "." + a.__qualname__ + b_name = b.__module__ + "." + b.__qualname__ + if a_nameb_name: + return 1 + else: + # very surprising to reach this - how can two different classes have the same module and name? + return 0 + + candidates.sort(key=functools.cmp_to_key(cmp)) diff --git a/tangos/tools/property_writer.py b/tangos/tools/property_writer.py index 7c454b4a..8ecb7f23 100644 --- a/tangos/tools/property_writer.py +++ b/tangos/tools/property_writer.py @@ -464,6 +464,11 @@ def run_timestep_calculation(self, db_timestep): sum([1 if isinstance(x.names, str) else len(x.names) for x in self._property_calculator_instances]) ) + logger.info(" The property modules are:") + for x in self._property_calculator_instances: + x_type = type(x) + logger.info(f" {x_type.__module__}.{x_type.__qualname__}") + for db_halo, existing_properties in \ self._get_parallel_halo_iterator(list(zip(db_halos, self._existing_properties_all_halos))): self._existing_properties_this_halo = existing_properties diff --git a/tests/test_property_class_mapping.py b/tests/test_property_class_mapping.py index e976c302..22c8e399 100644 --- a/tests/test_property_class_mapping.py +++ b/tests/test_property_class_mapping.py @@ -2,6 +2,7 @@ import tangos import tangos.input_handlers as soh +import tangos.input_handlers.pynbody as pih import tangos.properties as prop @@ -22,7 +23,13 @@ class PropertyForHandler1(prop.PropertyCalculation): class PropertyForHandler2(prop.PropertyCalculation): works_with_handler = _TestOutputHandler2 requires_particle_data = True - names = "widget", "robin" + names = "widget", "robin", "tree" + +class DensityProfileOverride(prop.pynbody.profile.HaloDensityProfile): + pass + +class CenterOverride(prop.pynbody.PynbodyPropertyCalculation): + names = "shrink_center" class PropertyForHandler1Child(prop.PropertyCalculation): works_with_handler = _TestOutputHandler1Child @@ -32,6 +39,17 @@ class PropertyForHandler1Child(prop.PropertyCalculation): class PropertyForLiveUse(prop.LivePropertyCalculation): names = "livewidget" +class AmbiguousPropertyForHandler2(prop.PropertyCalculation): + works_with_handler = _TestOutputHandler1 + requires_particle_data = True + names = "ambiguous" +class AmbiguousPropertyForHandler1(prop.PropertyCalculation): + works_with_handler = _TestOutputHandler1 + requires_particle_data = True + names = "ambiguous" + + + def test_setup(): assert issubclass(_TestOutputHandler1Child, _TestOutputHandler1) assert not issubclass(_TestOutputHandler1, _TestOutputHandler1Child) @@ -60,3 +78,16 @@ def test_map_liveproperty(): with assert_raises(NameError): prop.providing_class("widget") # unavailable as a live property + +def test_priorities(): + # most specialised subclass should be used: + assert (prop.providing_class("dm_density_profile", pih.PynbodyInputHandler) + is DensityProfileOverride) + + # if there is no class hierarchy, the one external to tangos should be used: + assert (prop.providing_class("shrink_center", pih.PynbodyInputHandler) + is CenterOverride) + + # alphabetical priority as last resort, first in alphabetical is prioritised: + assert (prop.providing_class("ambiguous", _TestOutputHandler1) + is AmbiguousPropertyForHandler1) \ No newline at end of file From 428a4944c3ffe6da2d5a58971e1c1cc7bb71b08a Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 11:46:15 +0100 Subject: [PATCH 05/22] Fix errors and clarify coding in providing_class --- tangos/properties/__init__.py | 55 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/tangos/properties/__init__.py b/tangos/properties/__init__.py index 638aee0e..ce4f6218 100644 --- a/tangos/properties/__init__.py +++ b/tangos/properties/__init__.py @@ -426,9 +426,12 @@ def providing_class(property_name, handler_class=None, silent_fail=False): When more than one possible class is capable of calculating the requested property, the following criteria are used to select one. The guiding criterion is to select user-provided code of the greatest specificity. - 1) If possible, the most specialised class in terms of the class hierarchy is used. So if a user-defined class - inherits from tangos.properties.pynbody.CentreAndRadius, and implements the same properties, the child class - will be preferred + 1) If possible, the class targetting the most specialised input handler is selected. That is, if a + class targetting say PynbodyInputHandler is available, it will be selected in preference to one + targetting HandlerBase. + 2) Next, the class hierarchy of the properties themselves is inspected. If one class is a subclass of another, + the more specialised class is selected. For example, if there are two classes calculating "my_prop", A and B, + and B is a child class of A, B is selected. 2) If there is no class hierarchy, the class defined in the tangos codebase is de-prioritised over any externally provided classes 3) If there is still a tie, the string representation of the classname (including the module) is used to sort @@ -473,33 +476,41 @@ def _sort_by_class_hierarchy(candidates): def cmp(a, b): if a is b: return 0 - elif issubclass(a, b): + + # Rule 1: prefer the most specialised handler + if issubclass(a.works_with_handler, b.works_with_handler): + return -1 + elif issubclass(b.works_with_handler, a.works_with_handler): + return 1 + + # Rule 2: prefer the most specialised class: + if issubclass(a, b): return -1 elif issubclass(b, a): return 1 - else: - # Next, let's see if one of these is internal to tangos. If so, we'll define it as - # less specialised than the user-provided other one - if a.__module__.startswith("tangos.") and not b.__module__.startswith("tangos."): - return 1 - elif b.__module__.startswith("tangos.") and not a.__module__.startswith("tangos."): - return -1 - else: - # out of sensible ways to order, now we just go alphabetical - a_name = a.__module__ + "." + a.__qualname__ - b_name = b.__module__ + "." + b.__qualname__ - if a_nameb_name: - return 1 - else: - # very surprising to reach this - how can two different classes have the same module and name? - return 0 + # Rule 3: prefer externally-provided classes over tangos-provided ones + + if a.__module__.startswith("tangos.") and not b.__module__.startswith("tangos."): + return 1 + elif b.__module__.startswith("tangos.") and not a.__module__.startswith("tangos."): + return -1 + + # Rule 4: out of sensible ways to order, now we just go alphabetical + a_name = a.__module__ + "." + a.__qualname__ + b_name = b.__module__ + "." + b.__qualname__ + if a_nameb_name: + return 1 + + # very surprising to reach this - how can two different classes have the same module and name? + return 0 candidates.sort(key=functools.cmp_to_key(cmp)) + def providing_classes(property_name_list, handler_class, silent_fail=False): """Return classes for given list of property names; see providing_class for details""" classes = [] From 1cecd1e6ae6ed4295a68dad876f4a9d26a196041 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 11:53:32 +0100 Subject: [PATCH 06/22] Don't import property modules when testing, since they can override expected results --- tangos/properties/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tangos/properties/__init__.py b/tangos/properties/__init__.py index ce4f6218..719f5ce4 100644 --- a/tangos/properties/__init__.py +++ b/tangos/properties/__init__.py @@ -1,5 +1,6 @@ import functools import importlib +import os import warnings import numpy as np @@ -539,6 +540,10 @@ def instantiate_class(simulation, property_name, silent_fail=False): return instance[0] def _import_configured_property_modules(): + if "PYTEST_CURRENT_TEST" in os.environ: + warnings.warn("Not importing external property modules during testing", ImportWarning) + return + from ..config import property_modules for pm in property_modules: if pm=="": continue From 30c95008b4fd35c13741856b34bbb9750dd08411 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 12:18:50 +0100 Subject: [PATCH 07/22] Provide option to explain why particular property classes get selected over others --- tangos/core/dictionary.py | 4 ++-- tangos/properties/__init__.py | 41 ++++++++++++++++++++++++--------- tangos/tools/property_writer.py | 6 ++++- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/tangos/core/dictionary.py b/tangos/core/dictionary.py index 9c03ce74..611488f7 100644 --- a/tangos/core/dictionary.py +++ b/tangos/core/dictionary.py @@ -20,9 +20,9 @@ def __repr__(self): def __init__(self, text): self.text = text - def providing_class(self, handler): + def providing_class(self, handler, explain=False): from .. import properties - return properties.providing_class(self.text, handler) + return properties.providing_class(self.text, handler, explain) raise_exception = object() diff --git a/tangos/properties/__init__.py b/tangos/properties/__init__.py index 719f5ce4..593d954d 100644 --- a/tangos/properties/__init__.py +++ b/tangos/properties/__init__.py @@ -9,6 +9,7 @@ from tangos.util import timing_monitor from .. import input_handlers, parallel_tasks +from ..log import logger class PropertyCalculationMetaClass(type): @@ -419,7 +420,7 @@ def all_properties(with_particle_data=True): return pr @functools.lru_cache -def providing_class(property_name, handler_class=None, silent_fail=False): +def providing_class(property_name, handler_class=None, silent_fail=False, explain=False): """Return property calculator class for given property name when files will be loaded by specified handler. If handler_class is None, return "live" properties which can be calculated without particle data. @@ -449,7 +450,7 @@ class targetting say PynbodyInputHandler is available, it will be selected in pr if len(candidates)>=1: # return the property which is most specialised - _sort_by_class_hierarchy(candidates) + _sort_by_class_hierarchy(candidates, explain) return candidates[0] elif silent_fail: return None @@ -473,38 +474,56 @@ def all_providing_classes(property_name): return candidates -def _sort_by_class_hierarchy(candidates): +def _sort_by_class_hierarchy(candidates, explain=False): + def explanation(s): + if explain: + logger.info(s) def cmp(a, b): + a_name = a.__module__ + "." + a.__qualname__ + b_name = b.__module__ + "." + b.__qualname__ + if a is b: return 0 # Rule 1: prefer the most specialised handler - if issubclass(a.works_with_handler, b.works_with_handler): - return -1 - elif issubclass(b.works_with_handler, a.works_with_handler): - return 1 + if a.works_with_handler is not b.works_with_handler: + if issubclass(a.works_with_handler, b.works_with_handler): + explanation(f"{a_name} is preferred to {b_name} because handler ({a.works_with_handler}) is a subclass " + f"of handler {b.works_with_handler}") + return -1 + elif issubclass(b.works_with_handler, a.works_with_handler): + explanation(f"{b_name} is preferred to {a_name} because handler ({b.works_with_handler}) is a subclass " + f"of handler {a.works_with_handler}") + return 1 # Rule 2: prefer the most specialised class: if issubclass(a, b): + explanation(f"{a_name} is preferred to {b_name} because it is a subclass of {b_name}") return -1 elif issubclass(b, a): + explanation(f"{b_name} is preferred to {a_name} because it is a subclass of {a_name}") return 1 # Rule 3: prefer externally-provided classes over tangos-provided ones if a.__module__.startswith("tangos.") and not b.__module__.startswith("tangos."): + explanation(f"{b_name} is preferred to {a_name} because it is provided externally to tangos") return 1 elif b.__module__.startswith("tangos.") and not a.__module__.startswith("tangos."): + explanation(f"{a_name} is preferred to {b_name} because it is provided externally to tangos") return -1 # Rule 4: out of sensible ways to order, now we just go alphabetical a_name = a.__module__ + "." + a.__qualname__ b_name = b.__module__ + "." + b.__qualname__ if a_nameb_name: + explanation(f"{b_name} is preferred to {a_name} because of alphabetical ordering") return 1 + explanation(f"{a_name} and {b_name} could not be distinguished by any of the ordering rules") # very surprising to reach this - how can two different classes have the same module and name? return 0 @@ -512,22 +531,22 @@ def cmp(a, b): -def providing_classes(property_name_list, handler_class, silent_fail=False): +def providing_classes(property_name_list, handler_class, silent_fail=False, explain=False): """Return classes for given list of property names; see providing_class for details""" classes = [] for property_name in property_name_list: - cl = providing_class(property_name, handler_class, silent_fail) + cl = providing_class(property_name, handler_class, silent_fail, explain) if cl not in classes and cl is not None: classes.append(cl) return classes -def instantiate_classes(simulation, property_name_list, silent_fail=False): +def instantiate_classes(simulation, property_name_list, silent_fail=False, explain=False): """Instantiate appropriate property calculation classes for a given simulation and list of property names.""" instances = [] handler_class = type(simulation.get_output_handler()) for property_identifier in property_name_list: - instances.append(providing_class(property_identifier, handler_class, silent_fail)(simulation)) + instances.append(providing_class(property_identifier, handler_class, silent_fail, explain)(simulation)) return instances diff --git a/tangos/tools/property_writer.py b/tangos/tools/property_writer.py index 8ecb7f23..6e891a80 100644 --- a/tangos/tools/property_writer.py +++ b/tangos/tools/property_writer.py @@ -78,6 +78,8 @@ def add_parser_arguments(self, parser): help="Specify the paralellism backend (e.g. pypar, mpi4py)") parser.add_argument('--include-only', action='append', type=str, help="Specify a filter that describes which objects the calculation should be executed for. Multiple filters may be specified, in which case they must all evaluate to true for the object to be included.") + parser.add_argument('--explain-classes', action='store_true', + help="Log some explanation for why property classes are selected (when there is any ambiguity)") def _create_parser_obj(self): parser = argparse.ArgumentParser() @@ -446,7 +448,9 @@ def run_timestep_calculation(self, db_timestep): self.tracker = CalculationSuccessTracker() logger.info("Processing %r", db_timestep) - self._property_calculator_instances = properties.instantiate_classes(db_timestep.simulation, self.options.properties) + self._property_calculator_instances = properties.instantiate_classes(db_timestep.simulation, + self.options.properties, + explain=self.options.explain_classes) if self.options.with_prerequisites: self._add_prerequisites_to_calculator_instances(db_timestep) From 3e6324deb53f1351fb04968b9bccfe3b9c113727 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 15:59:46 +0100 Subject: [PATCH 08/22] Better docstring and explanations for property class selection --- tangos/properties/__init__.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tangos/properties/__init__.py b/tangos/properties/__init__.py index 593d954d..ca0b2071 100644 --- a/tangos/properties/__init__.py +++ b/tangos/properties/__init__.py @@ -423,7 +423,12 @@ def all_properties(with_particle_data=True): def providing_class(property_name, handler_class=None, silent_fail=False, explain=False): """Return property calculator class for given property name when files will be loaded by specified handler. - If handler_class is None, return "live" properties which can be calculated without particle data. + :param property_name -- name of property to be calculated + :param handler_class -- class of handler that will be used to load files + (e.g. input_handlers.pynbody.PynbodyInputHandler). + If None, return "live" properties which can be calculated without particle data. + :param silent_fail -- if True, return None if no class is found, otherwise raise NameError + :param explain -- if True, print out the reason why a particular class was selected When more than one possible class is capable of calculating the requested property, the following criteria are used to select one. The guiding criterion is to select user-provided code of the greatest specificity. @@ -441,12 +446,18 @@ class targetting say PynbodyInputHandler is available, it will be selected in pr """ - candidates = all_providing_classes(property_name) + candidates_unfiltered = all_providing_classes(property_name) if handler_class is None: - candidates = list(filter(lambda c: not c.requires_particle_data, candidates)) + candidates = list(filter(lambda c: not c.requires_particle_data, candidates_unfiltered)) else: - candidates = list(filter(lambda c: issubclass(handler_class, c.works_with_handler), candidates)) + candidates = [] + for c in candidates_unfiltered: + if issubclass(handler_class, c.works_with_handler): + candidates.append(c) + else: + if explain: + logger.info(f"Property class selection: {c.__module__}.{c.__qualname__} is excluded, as it is not compatible with handler {handler_class}") if len(candidates)>=1: # return the property which is most specialised @@ -477,7 +488,7 @@ def all_providing_classes(property_name): def _sort_by_class_hierarchy(candidates, explain=False): def explanation(s): if explain: - logger.info(s) + logger.info("Property class selection: "+s) def cmp(a, b): a_name = a.__module__ + "." + a.__qualname__ b_name = b.__module__ + "." + b.__qualname__ From 4187b7bcd811d95d97ecfb7fa309a6055766cb45 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 16:00:04 +0100 Subject: [PATCH 09/22] Fix pre-commit failures --- tangos/properties/pynbody/__init__.py | 2 +- tangos/properties/pynbody/radius.py | 2 +- tests/test_property_class_mapping.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tangos/properties/pynbody/__init__.py b/tangos/properties/pynbody/__init__.py index 914c49b6..18d29c85 100644 --- a/tangos/properties/pynbody/__init__.py +++ b/tangos/properties/pynbody/__init__.py @@ -8,4 +8,4 @@ class PynbodyPropertyCalculation(PropertyCalculation): PynbodyHaloProperties = PynbodyPropertyCalculation # old name, to be deprecated -from . import BH, SF, centring, eagle, gas, images, mass, profile, zoom, radius +from . import BH, SF, centring, eagle, gas, images, mass, profile, radius, zoom diff --git a/tangos/properties/pynbody/radius.py b/tangos/properties/pynbody/radius.py index 00b23d7d..2fba9b48 100644 --- a/tangos/properties/pynbody/radius.py +++ b/tangos/properties/pynbody/radius.py @@ -82,4 +82,4 @@ def _get_overdensity_contrast(): @staticmethod def _get_reference_definition(): - return 'matter' \ No newline at end of file + return 'matter' diff --git a/tests/test_property_class_mapping.py b/tests/test_property_class_mapping.py index 22c8e399..fa382b66 100644 --- a/tests/test_property_class_mapping.py +++ b/tests/test_property_class_mapping.py @@ -90,4 +90,4 @@ def test_priorities(): # alphabetical priority as last resort, first in alphabetical is prioritised: assert (prop.providing_class("ambiguous", _TestOutputHandler1) - is AmbiguousPropertyForHandler1) \ No newline at end of file + is AmbiguousPropertyForHandler1) From eee95be8c228fece3634d40a4400f56f1f178ba3 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 19:04:03 +0100 Subject: [PATCH 10/22] Delete tutorial files after ingesting them, to save disk space on github action runner --- .github/workflows/integration-test.yaml | 2 +- test_tutorial_build/build.sh | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 2176c061..21ffb96c 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -62,7 +62,7 @@ jobs: - name: Build test database working-directory: test_tutorial_build - run: bash build.sh + run: export DELETE_FILES_AFTER_INPUT=1; bash build.sh - uses: actions/upload-artifact@v2 with: diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index 73bcafb2..e9314bf7 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -23,6 +23,9 @@ build_gadget4() { tangos import-properties --for tutorial_gadget4 tangos import-properties --for tutorial_gadget4 --type group $MPI tangos $MPIBACKEND write dm_density_profile --with-prerequisites --include-only="NDM()>5000" --type=halo --for tutorial_gadget4 + if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then + rm -rf tutorial_gadget4 + fi } build_gadget_subfind() { @@ -50,6 +53,9 @@ build_ramses() { $MPI tangos link --for tutorial_ramses $MPIBACKEND $MPI tangos write contamination_fraction --for tutorial_ramses $MPIBACKEND $MPI tangos write dm_density_profile --with-prerequisites --include-only="contamination_fraction<0.01" --for tutorial_ramses $MPIBACKEND + if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then + rm -rf tutorial_ramses + fi } build_changa() { @@ -59,6 +65,10 @@ build_changa() { $MPI tangos link --for tutorial_changa$1 $MPIBACKEND $MPI tangos write contamination_fraction --for tutorial_changa$1 $MPIBACKEND $MPI tangos write dm_density_profile gas_density_profile uvi_image SFR_histogram --with-prerequisites --include-only="contamination_fraction<0.01" --include-only="NDM()>5000" $MPILOADMODE --for tutorial_changa$1 $MPIBACKEND + # delete tutorial_changa if $1 is a null string and DELETE_FILES_AFTER_IMPORT is set + if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ] && [ -z "$1" ]; then + rm -rf tutorial_changa + fi } build_changa_bh() { @@ -66,6 +76,9 @@ build_changa_bh() { tangos import-changa-bh --sims tutorial_changa_blackholes $MPI tangos write BH_mass BH_mdot_histogram --for tutorial_changa_blackholes --type bh $MPIBACKEND $MPI tangos crosslink tutorial_changa tutorial_changa_blackholes $MPIBACKEND + if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then + rm -rf tutorial_changa_blackholes + fi } build_enzo_yt() { From 57bdbc9ad42a0c04822e7a274460380d9825f8bc Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 21:01:39 +0100 Subject: [PATCH 11/22] Attempt to fit the integration testing into free github runner's reduced disk footprint --- .github/workflows/integration-test.yaml | 23 ++-------- test_tutorial_build/build.sh | 57 ++++++++++++++----------- 2 files changed, 37 insertions(+), 43 deletions(-) diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index 21ffb96c..a06fd98d 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -43,26 +43,9 @@ jobs: - name: Install latest yt run: python -m pip install yt - - name: Cache test datasets - id: cache-test-datasets - uses: actions/cache@v2 - with: - path: | - test_tutorial_build/tutorial_* - test_tutorial_build/enzo.tinycosmo - test_tutorial_build/reference_database.db - key: replace-later-with-md5 # need to work out a way to generate a key here at some point if test data changes - - - name: Fetch test datasets - if: steps.cache-test-datasets.outputs.cache-hit != 'true' - working-directory: test_tutorial_build - run: | - wget -T 60 -nv ftp://zuserver2.star.ucl.ac.uk/app/tangos/mini_tutorial_test.tar.gz - tar -xzvf mini_tutorial_test.tar.gz - - name: Build test database working-directory: test_tutorial_build - run: export DELETE_FILES_AFTER_INPUT=1; bash build.sh + run: export INTEGRATION_TESTING=1; bash build.sh - uses: actions/upload-artifact@v2 with: @@ -71,7 +54,9 @@ jobs: - name: Verify database working-directory: test_tutorial_build - run: tangos diff data.db reference_database.db --property-tolerance dm_density_profile 1e-2 0 + run: + wget https://zenodo.org/record/8423051/files/reference_database.db?download=1 -O reference_database.db -nv + tangos diff data.db reference_database.db --property-tolerance dm_density_profile 1e-2 0 # --property-tolerance dm_density_profile here is because if a single particle crosses between bins # (which seems to happen due to differing library versions), the profile can change by this much # diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index e9314bf7..ae4e12df 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -2,7 +2,13 @@ get_tutorial_data() { if [ ! -d tutorial_$1 ]; then - wget -O - https://zenodo.org/record/5155467/files/tutorial_$1.tar.gz?download=1 | tar -xz + if [ -z "$INTEGRATION_TESTING" ]; then + echo "Downloading tutorial data for $1" + wget -nv -O - https://zenodo.org/record/5155467/files/tutorial_$1.tar.gz?download=1 | tar -xzv + else + echo "Downloading mini tutorial data for $1" + wget -nv -O - https://zenodo.org/record/8423051/files/tutorial_$1.tar.gz?download=1 | tar -xzv + fi fi } @@ -23,9 +29,6 @@ build_gadget4() { tangos import-properties --for tutorial_gadget4 tangos import-properties --for tutorial_gadget4 --type group $MPI tangos $MPIBACKEND write dm_density_profile --with-prerequisites --include-only="NDM()>5000" --type=halo --for tutorial_gadget4 - if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then - rm -rf tutorial_gadget4 - fi } build_gadget_subfind() { @@ -53,9 +56,6 @@ build_ramses() { $MPI tangos link --for tutorial_ramses $MPIBACKEND $MPI tangos write contamination_fraction --for tutorial_ramses $MPIBACKEND $MPI tangos write dm_density_profile --with-prerequisites --include-only="contamination_fraction<0.01" --for tutorial_ramses $MPIBACKEND - if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then - rm -rf tutorial_ramses - fi } build_changa() { @@ -65,10 +65,6 @@ build_changa() { $MPI tangos link --for tutorial_changa$1 $MPIBACKEND $MPI tangos write contamination_fraction --for tutorial_changa$1 $MPIBACKEND $MPI tangos write dm_density_profile gas_density_profile uvi_image SFR_histogram --with-prerequisites --include-only="contamination_fraction<0.01" --include-only="NDM()>5000" $MPILOADMODE --for tutorial_changa$1 $MPIBACKEND - # delete tutorial_changa if $1 is a null string and DELETE_FILES_AFTER_IMPORT is set - if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ] && [ -z "$1" ]; then - rm -rf tutorial_changa - fi } build_changa_bh() { @@ -76,12 +72,10 @@ build_changa_bh() { tangos import-changa-bh --sims tutorial_changa_blackholes $MPI tangos write BH_mass BH_mdot_histogram --for tutorial_changa_blackholes --type bh $MPIBACKEND $MPI tangos crosslink tutorial_changa tutorial_changa_blackholes $MPIBACKEND - if [ ! -z "$DELETE_FILES_AFTER_IMPORT" ]; then - rm -rf tutorial_changa_blackholes - fi } build_enzo_yt() { + get_tutorial_data enzo.tinycosmo if [ -d enzo.tinycosmo ]; then tangos add enzo.tinycosmo --handler=yt.YtInputHandler --min-particles 100 tangos import-consistent-trees --for enzo.tinycosmo --with-ids @@ -92,18 +86,27 @@ build_enzo_yt() { fi } +clearup_files() { + if [ ! -z "$INTEGRATION_TESTING" ]; then + rm -rf $1 + fi +} + + +if [ -z "$INTEGRATION_TESTING" ]; then + echo "This script builds the tangos tutorial database" + echo + echo "It will download data and build in the current working directory:" + echo " "`pwd` + echo + echo "The total required space is approximately 35GB" + echo + echo "If this is not what you want, press ^C now" + echo "Starting process in 5 seconds..." + sleep 5 +fi -echo "This script builds the tangos tutorial database" -echo -echo "It will download data and build in the current working directory:" -echo " "`pwd` -echo -echo "The total required space is approximately 35GB" -echo -echo "If this is not what you want, press ^C now" -echo "Starting process in 5 seconds..." -sleep 5 export TANGOS_DB_CONNECTION=`pwd`/data.db export TANGOS_SIMULATION_FOLDER=`pwd` @@ -114,8 +117,14 @@ set -e build_gadget_subfind build_gadget_rockstar +clearup_files("tutorial_gadget") build_ramses +clearup_files("tutorial_ramses") build_changa +clearup_files("tutorial_changa") build_changa_bh +clearup_files("tutorial_changa_blackholes") build_gadget4 +clearup_files("tutorial_gadget4") build_enzo_yt +clearup_files("enzo.tinycosmo") From d8f668ca76d98a865d1ea7fea334fb94f8087619 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Mon, 9 Oct 2023 22:05:53 +0100 Subject: [PATCH 12/22] Fix syntax errors in build.sh --- test_tutorial_build/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index ae4e12df..bb00618b 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -117,14 +117,14 @@ set -e build_gadget_subfind build_gadget_rockstar -clearup_files("tutorial_gadget") +clearup_files tutorial_gadget build_ramses -clearup_files("tutorial_ramses") +clearup_files tutorial_ramses build_changa -clearup_files("tutorial_changa") +clearup_files tutorial_changa build_changa_bh -clearup_files("tutorial_changa_blackholes") +clearup_files tutorial_changa_blackholes build_gadget4 -clearup_files("tutorial_gadget4") +clearup_files tutorial_gadget4 build_enzo_yt -clearup_files("enzo.tinycosmo") +clearup_files enzo.tinycosmo From e68d9c248909cd14e32e7fa76aa43c8363b35427 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 09:37:44 +0100 Subject: [PATCH 13/22] Update integration testing to retain tutorial_changa for crosslinking with tutorial_changa_blackholes Put this first in the hope that disk space doesn't run out --- test_tutorial_build/build.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index bb00618b..b2af7b35 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -115,15 +115,16 @@ detect_mpi set -e +build_changa +build_changa_bh +clearup_files tutorial_changa +clearup_files tutorial_changa_blackholes build_gadget_subfind build_gadget_rockstar clearup_files tutorial_gadget +clearup_files tutorial_gadget_rockstar build_ramses clearup_files tutorial_ramses -build_changa -clearup_files tutorial_changa -build_changa_bh -clearup_files tutorial_changa_blackholes build_gadget4 clearup_files tutorial_gadget4 build_enzo_yt From 653578f8ef614eca06674fde74f29e633b59c409 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 11:30:38 +0100 Subject: [PATCH 14/22] Correct paths for test data --- test_tutorial_build/build.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index b2af7b35..9ad65d98 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash get_tutorial_data() { - if [ ! -d tutorial_$1 ]; then + if [ ! -d $1 ]; then if [ -z "$INTEGRATION_TESTING" ]; then echo "Downloading tutorial data for $1" wget -nv -O - https://zenodo.org/record/5155467/files/tutorial_$1.tar.gz?download=1 | tar -xzv @@ -24,7 +24,7 @@ detect_mpi() { } build_gadget4() { - get_tutorial_data gadget4 + get_tutorial_data tutorial_gadget4 tangos add tutorial_gadget4 tangos import-properties --for tutorial_gadget4 tangos import-properties --for tutorial_gadget4 --type group @@ -32,7 +32,7 @@ build_gadget4() { } build_gadget_subfind() { - get_tutorial_data gadget + get_tutorial_data tutorial_gadget tangos add tutorial_gadget --min-particles 100 tangos import-properties --for tutorial_gadget tangos import-properties --type group --for tutorial_gadget @@ -42,8 +42,8 @@ build_gadget_subfind() { } build_gadget_rockstar() { - get_tutorial_data gadget - get_tutorial_data gadget_rockstar + get_tutorial_data tutorial_gadget + get_tutorial_data tutorial_gadget_rockstar tangos add tutorial_gadget_rockstar --min-particles 100 tangos import-properties Mvir Rvir X Y Z --for tutorial_gadget_rockstar tangos import-consistent-trees --for tutorial_gadget_rockstar @@ -51,7 +51,7 @@ build_gadget_rockstar() { } build_ramses() { - get_tutorial_data ramses + get_tutorial_data tutorial_ramses tangos add tutorial_ramses --min-particles 100 --no-renumber $MPI tangos link --for tutorial_ramses $MPIBACKEND $MPI tangos write contamination_fraction --for tutorial_ramses $MPIBACKEND @@ -59,7 +59,7 @@ build_ramses() { } build_changa() { - get_tutorial_data changa$1 + get_tutorial_data tutorial_changa$1 tangos add tutorial_changa$1 tangos import-properties Mvir Rvir --for tutorial_changa$1 $MPI tangos link --for tutorial_changa$1 $MPIBACKEND From 6ff1919dace97fb0704260b9bd0daba64173626f Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 16:35:06 +0100 Subject: [PATCH 15/22] Further fixes to paths for fetching data --- test_tutorial_build/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_tutorial_build/build.sh b/test_tutorial_build/build.sh index 9ad65d98..3bf8fff8 100755 --- a/test_tutorial_build/build.sh +++ b/test_tutorial_build/build.sh @@ -4,10 +4,10 @@ get_tutorial_data() { if [ ! -d $1 ]; then if [ -z "$INTEGRATION_TESTING" ]; then echo "Downloading tutorial data for $1" - wget -nv -O - https://zenodo.org/record/5155467/files/tutorial_$1.tar.gz?download=1 | tar -xzv + wget -nv -O - https://zenodo.org/record/5155467/files/$1.tar.gz?download=1 | tar -xzv else echo "Downloading mini tutorial data for $1" - wget -nv -O - https://zenodo.org/record/8423051/files/tutorial_$1.tar.gz?download=1 | tar -xzv + wget -nv -O - https://zenodo.org/record/8423051/files/$1.tar.gz?download=1 | tar -xzv fi fi } From 66b8067332efddb9913f08076c65152017daee5a Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 20:22:33 +0100 Subject: [PATCH 16/22] Fix confusing output from enumerate_objects --- tangos/input_handlers/pynbody.py | 2 +- tangos/input_handlers/ramsesHOP.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tangos/input_handlers/pynbody.py b/tangos/input_handlers/pynbody.py index 3fee78a3..b5d8aa54 100644 --- a/tangos/input_handlers/pynbody.py +++ b/tangos/input_handlers/pynbody.py @@ -229,7 +229,7 @@ def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_partic if self._can_enumerate_objects_from_statfile(ts_extension, object_typetag): yield from self._enumerate_objects_from_statfile(ts_extension, object_typetag) else: - logger.warning("No halo statistics file found for timestep %r",ts_extension) + logger.warning("No %s statistics file found for timestep %r", object_typetag, ts_extension) snapshot_keep_alive = self.load_timestep(ts_extension) try: diff --git a/tangos/input_handlers/ramsesHOP.py b/tangos/input_handlers/ramsesHOP.py index 75c2945e..322f27f7 100644 --- a/tangos/input_handlers/ramsesHOP.py +++ b/tangos/input_handlers/ramsesHOP.py @@ -167,7 +167,7 @@ def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_partic if self._can_enumerate_objects_from_statfile(ts_extension, object_typetag): yield from self._enumerate_objects_from_statfile(ts_extension, object_typetag) else: - logger.warning("No halo statistics file found for timestep %r", ts_extension) + logger.warning("No %s statistics file found for timestep %r", object_typetag, ts_extension) try: h = self._construct_halo_cat(ts_extension, object_typetag) From 3bca3a7188f20ca91f01b80ba14fdbe2cc74fa61 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 20:28:54 +0100 Subject: [PATCH 17/22] Don't scan over files more often than necessary (important for slow filesystems) --- tangos/input_handlers/finding.py | 2 +- tangos/input_handlers/pynbody.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tangos/input_handlers/finding.py b/tangos/input_handlers/finding.py index d79bcdf0..4c6e9ca8 100644 --- a/tangos/input_handlers/finding.py +++ b/tangos/input_handlers/finding.py @@ -65,7 +65,7 @@ def best_matching_handler(cls, basename): def enumerate_timestep_extensions(self): base = os.path.join(config.base, self.basename) - extensions = find(basename=base + "/", patterns=self.patterns) + extensions = sorted(find(basename=base + "/", patterns=self.patterns)) logger.info("Enumerate timestep extensions base=%r patterns=%s", base, self.patterns) for e in extensions: if self._is_able_to_load(self._transform_extension(e)): diff --git a/tangos/input_handlers/pynbody.py b/tangos/input_handlers/pynbody.py index b5d8aa54..98092765 100644 --- a/tangos/input_handlers/pynbody.py +++ b/tangos/input_handlers/pynbody.py @@ -260,9 +260,9 @@ def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_partic pass def get_properties(self): - timesteps = list(self.enumerate_timestep_extensions()) - if len(timesteps)>0: - f = self.load_timestep_without_caching(sorted(timesteps)[-1]) + timesteps = self.enumerate_timestep_extensions() + try: + f = self.load_timestep_without_caching(next(timesteps)) if self.quicker: res_kpc = self._estimate_spatial_resolution_quicker(f) res_msol = self._estimate_mass_resolution_quicker(f) @@ -271,7 +271,7 @@ def get_properties(self): res_msol = self._estimate_mass_resolution(f) return {'approx_resolution_kpc': res_kpc, 'approx_resolution_Msol': res_msol} - else: + except StopIteration: return {} def _estimate_spatial_resolution(self, f): From 76db6dfe3e38624be6933a33cc46db3b13d51600 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 20:38:51 +0100 Subject: [PATCH 18/22] Fix issue with new versions of yt? --- tangos/input_handlers/yt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tangos/input_handlers/yt.py b/tangos/input_handlers/yt.py index f9730f16..5f607143 100644 --- a/tangos/input_handlers/yt.py +++ b/tangos/input_handlers/yt.py @@ -146,13 +146,13 @@ def _load_halo_cat_without_caching(self, ts_extension, snapshot_file): rockfiles = np.array(rockfiles)[sortord] timestep_ind = np.argwhere(np.array([s.split('/')[-1] for s in snapfiles])==ts_extension.split('/')[0])[0] fnum = int(rockfiles[timestep_ind][0].split('.')[0].split('_')[-1]) - cat = yt.frontends.rockstar.RockstarDataset(self._extension_to_filename("halos_"+str(fnum)+".0.bin")) + cat = yt.load(self._extension_to_filename("halos_"+str(fnum)+".0.bin")) cat_data = cat.all_data() # Check whether rockstar was run with Behroozi's distribution or Wise's if np.any(cat_data["halos","particle_identifier"]<0): del cat del cat_data - cat = yt.frontends.rockstar.RockstarDataset(self._extension_to_filename("halos_"+str(fnum)+".0.bin")) + cat = yt.load(self._extension_to_filename("halos_"+str(fnum)+".0.bin")) cat.parameters['format_revision'] = 2 # cat_data = cat.all_data() return cat, cat_data From fa77756e73bcb7ef1d124c122534d7204ace5703 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 20:47:14 +0100 Subject: [PATCH 19/22] Fix test where numbers have changed slightly due to different file finding strategy --- tests/test_simulation_outputs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_simulation_outputs.py b/tests/test_simulation_outputs.py index 70735bf0..c92f7087 100644 --- a/tests/test_simulation_outputs.py +++ b/tests/test_simulation_outputs.py @@ -40,8 +40,8 @@ def test_handler_properties(): def test_handler_properties_quicker_flag(): output_manager.quicker = True prop = output_manager.get_properties() - npt.assert_allclose(prop['approx_resolution_kpc'], 33.4360203909648) - npt.assert_allclose(prop['approx_resolution_Msol'], 40370039199.44858) + npt.assert_allclose(prop['approx_resolution_kpc'], 33.590757, rtol=1e-5) + npt.assert_allclose(prop['approx_resolution_Msol'], 2.412033e+10, rtol=1e-5) def test_enumerate(): assert set(output_manager.enumerate_timestep_extensions())=={"tiny.000640","tiny.000832"} From ceb5a4068d5f99b06db351410bb08538385fd426 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 21:00:24 +0100 Subject: [PATCH 20/22] Fix out of date yt reference (will this break people using yt<4?) --- tangos/input_handlers/yt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tangos/input_handlers/yt.py b/tangos/input_handlers/yt.py index 5f607143..dc6380ba 100644 --- a/tangos/input_handlers/yt.py +++ b/tangos/input_handlers/yt.py @@ -90,7 +90,8 @@ class YtChangaAHFInputHandler(YtInputHandler): patterns = ["*.00???", "*.00????"] def _load_halo_cat_without_caching(self, ts_extension, snapshot_file): - cat = yt.frontends.ahf.AHFHalosDataset(self._extension_to_filename("halos/"+ts_extension)+".AHF_param", + import yt.frontends.ahf.api + cat = yt.frontends.ahf.api.AHFHalosDataset(self._extension_to_filename("halos/"+ts_extension)+".AHF_param", hubble_constant = snapshot_file.hubble_constant) cat_data = cat.all_data() return cat, cat_data From bd621f95e4ee0165c7043e03de3b57d55c37ac7a Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Tue, 10 Oct 2023 21:05:28 +0100 Subject: [PATCH 21/22] Now try getting something to work for yt 3 and yt 4 alike --- tangos/input_handlers/yt.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tangos/input_handlers/yt.py b/tangos/input_handlers/yt.py index dc6380ba..f674da98 100644 --- a/tangos/input_handlers/yt.py +++ b/tangos/input_handlers/yt.py @@ -90,8 +90,13 @@ class YtChangaAHFInputHandler(YtInputHandler): patterns = ["*.00???", "*.00????"] def _load_halo_cat_without_caching(self, ts_extension, snapshot_file): - import yt.frontends.ahf.api - cat = yt.frontends.ahf.api.AHFHalosDataset(self._extension_to_filename("halos/"+ts_extension)+".AHF_param", + try: + # yt 4 + from yt.frontends.ahf.api import AHFHalosDataset + except ImportError: + # yt 3 + from yt.frontends.ahf import AHFHalosDataset + cat = AHFHalosDataset(self._extension_to_filename("halos/"+ts_extension)+".AHF_param", hubble_constant = snapshot_file.hubble_constant) cat_data = cat.all_data() return cat, cat_data From a887b218bdc6975be3c2ce0d3bc8418498a12162 Mon Sep 17 00:00:00 2001 From: Andrew Pontzen Date: Wed, 11 Oct 2023 12:12:09 +0100 Subject: [PATCH 22/22] Fix typo in integration-test.yaml and update zenodo record to latest reference_database.db --- .github/workflows/integration-test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml index a06fd98d..4493f3f0 100644 --- a/.github/workflows/integration-test.yaml +++ b/.github/workflows/integration-test.yaml @@ -54,8 +54,8 @@ jobs: - name: Verify database working-directory: test_tutorial_build - run: - wget https://zenodo.org/record/8423051/files/reference_database.db?download=1 -O reference_database.db -nv + run: | + wget https://zenodo.org/record/8430190/files/reference_database.db?download=1 -O reference_database.db -nv tangos diff data.db reference_database.db --property-tolerance dm_density_profile 1e-2 0 # --property-tolerance dm_density_profile here is because if a single particle crosses between bins # (which seems to happen due to differing library versions), the profile can change by this much