From ee064167d7ca4d000546a1fa2b8c6b6d994f9a1e Mon Sep 17 00:00:00 2001 From: Graham Gower Date: Tue, 22 Jun 2021 10:57:35 +0200 Subject: [PATCH 1/2] Add load_all()/dump_all() to support multidoc YAML. Closes #239. --- demes/__init__.py | 11 +++++- demes/load_dump.py | 74 ++++++++++++++++++++++++++++++++++------ docs/api.md | 2 ++ tests/conftest.py | 8 +++++ tests/test_load_dump.py | 75 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 12 deletions(-) diff --git a/demes/__init__.py b/demes/__init__.py index 05a51fbe..ae03846c 100644 --- a/demes/__init__.py +++ b/demes/__init__.py @@ -20,5 +20,14 @@ Merge, Admix, ) -from .load_dump import load_asdict, loads_asdict, load, loads, dump, dumps +from .load_dump import ( + load_asdict, + loads_asdict, + load, + loads, + load_all, + dump, + dumps, + dump_all, +) from .ms import from_ms diff --git a/demes/load_dump.py b/demes/load_dump.py index 9effcc03..11f9148f 100644 --- a/demes/load_dump.py +++ b/demes/load_dump.py @@ -5,7 +5,7 @@ import json import io import math -from typing import MutableMapping, Any +from typing import Any, Generator, MutableMapping import ruamel.yaml @@ -36,12 +36,22 @@ def _open_file_polymorph(polymorph, mode="r"): # which are hopefully simple enough to not suffer from API instability. -def _load_yaml_asdict(fp): +def _load_yaml_asdict(fp) -> MutableMapping[str, Any]: with ruamel.yaml.YAML(typ="safe") as yaml: return yaml.load(fp) -def _dump_yaml_fromdict(data, fp): +def _dump_yaml_fromdict(data, fp, multidoc=False) -> None: + """ + Dump data dict to a YAML file-like object. + + :param bool multidoc: If True, output the YAML document start line ``---``, + and document end line ``...``, which indicate the beginning and end of + a YAML document respectively. The start indicator is needed when + outputing multiple YAML documents to a single file (or file stream). + The end indicator is not strictly needed, but may be desirable + depending on the underlying communication channel. + """ with ruamel.yaml.YAML(typ="safe", output=fp) as yaml: # Output flow style, but only for collections that consist only # of scalars (i.e. the leaves in the document tree). @@ -51,6 +61,9 @@ def _dump_yaml_fromdict(data, fp): yaml.allow_unicode = False # Keep dict insertion order, thank you very much! yaml.sort_base_mapping_type_on_output = False + if multidoc: + yaml.explicit_start = True + yaml.explicit_end = True yaml.dump(data) @@ -93,7 +106,7 @@ def _unstringify_infinities(data: MutableMapping[str, Any]) -> None: migration["start_time"] = float(start_time) -def loads_asdict(string, *, format="yaml"): +def loads_asdict(string, *, format="yaml") -> MutableMapping[str, Any]: """ Load a YAML or JSON string into a dictionary of nested objects. The keywords and structure of the input are defined by the @@ -109,7 +122,7 @@ def loads_asdict(string, *, format="yaml"): return load_asdict(stream, format=format) -def load_asdict(filename, *, format="yaml"): +def load_asdict(filename, *, format="yaml") -> MutableMapping[str, Any]: """ Load a YAML or JSON file into a dictionary of nested objects. The keywords and structure of the input are defined by the @@ -135,7 +148,7 @@ def load_asdict(filename, *, format="yaml"): return data -def loads(string, *, format="yaml"): +def loads(string, *, format="yaml") -> "demes.Graph": """ Load a graph from a YAML or JSON string. The keywords and structure of the input are defined by the @@ -150,7 +163,7 @@ def loads(string, *, format="yaml"): return demes.Graph.fromdict(data) -def load(filename, *, format="yaml"): +def load(filename, *, format="yaml") -> "demes.Graph": """ Load a graph from a YAML or JSON file. The keywords and structure of the input are defined by the @@ -167,7 +180,26 @@ def load(filename, *, format="yaml"): return demes.Graph.fromdict(data) -def dumps(graph, *, format="yaml", simplified=True): +def load_all(filename) -> Generator["demes.Graph", None, None]: + """ + Generate graphs from a YAML document stream. Documents must be separated by + the YAML document start indicator, ``---``. + The keywords and structure of each document are defined by the + :ref:`spec:sec_ref`. + + :param filename: The path to the file to be loaded, or a file-like object + with a ``read()`` method. + :type filename: Union[str, os.PathLike, FileLike] + :return: A generator of graphs. + :rtype: Generator[demes.Graph, None, None] + """ + with _open_file_polymorph(filename) as f: + with ruamel.yaml.YAML(typ="safe") as yaml: + for data in yaml.load_all(f): + yield demes.Graph.fromdict(data) + + +def dumps(graph, *, format="yaml", simplified=True) -> str: """ Dump the specified graph to a YAML or JSON string. The keywords and structure of the output are defined by the @@ -176,7 +208,7 @@ def dumps(graph, *, format="yaml", simplified=True): :param .Graph graph: The graph to dump. :param str format: The format of the output file. Either "yaml" or "json". :param bool simplified: If True, returns a simplified graph. If False, returns - a complete redundant graph. + a fully-qualified graph. :return: The YAML or JSON string. :rtype: str """ @@ -186,7 +218,7 @@ def dumps(graph, *, format="yaml", simplified=True): return string -def dump(graph, filename, *, format="yaml", simplified=True): +def dump(graph, filename, *, format="yaml", simplified=True) -> None: """ Dump the specified graph to a file. The keywords and structure of the output are defined by the @@ -198,7 +230,7 @@ def dump(graph, filename, *, format="yaml", simplified=True): :type filename: Union[str, os.PathLike, FileLike] :param str format: The format of the output file. Either "yaml" or "json". :param bool simplified: If True, outputs a simplified graph. If False, outputs - a redundant graph. + a fully-qualified graph. """ if simplified: data = graph.asdict_simplified() @@ -214,3 +246,23 @@ def dump(graph, filename, *, format="yaml", simplified=True): _dump_yaml_fromdict(data, f) else: raise ValueError(f"unknown format: {format}") + + +def dump_all(graphs, filename, *, simplified=True) -> None: + """ + Dump the specified graphs to a multi-document YAML file or output stream. + + :param graphs: An iterable of graphs to dump. + :param filename: Path to the output file, or a file-like object with a + ``write()`` method. + :type filename: Union[str, os.PathLike, FileLike] + :param bool simplified: If True, outputs simplified graphs. If False, outputs + fully-qualified graphs. + """ + with _open_file_polymorph(filename, "w") as f: + for graph in graphs: + if simplified: + data = graph.asdict_simplified() + else: + data = graph.asdict() + _dump_yaml_fromdict(data, f, multidoc=True) diff --git a/docs/api.md b/docs/api.md index 33dd5324..6c0b8258 100644 --- a/docs/api.md +++ b/docs/api.md @@ -9,8 +9,10 @@ .. autofunction:: demes.load_asdict .. autofunction:: demes.loads .. autofunction:: demes.loads_asdict +.. autofunction:: demes.load_all .. autofunction:: demes.dump .. autofunction:: demes.dumps +.. autofunction:: demes.dump_all ``` ## Building Demes graphs diff --git a/tests/conftest.py b/tests/conftest.py index e436553b..452c373f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,15 @@ deadline=None, suppress_health_check=[hypothesis.HealthCheck.too_slow], ) +hypothesis.settings.register_profile( + "default", + max_examples=100, + deadline=None, + suppress_health_check=[hypothesis.HealthCheck.too_slow], +) # GitHub Actions sets the CI environment variable. if os.getenv("CI", False): hypothesis.settings.load_profile("ci") +else: + hypothesis.settings.load_profile("default") diff --git a/tests/test_load_dump.py b/tests/test_load_dump.py index c692d4b8..4082d93c 100644 --- a/tests/test_load_dump.py +++ b/tests/test_load_dump.py @@ -594,3 +594,78 @@ def test_json_infinities_get_stringified(self): assert data["migrations"][0]["start_time"] == "Infinity" g2 = demes.loads(json_str, format="json") g2.assert_close(g1) + + +class TestMultiDocument: + @pytest.mark.parametrize("yaml_file", tests.example_files()) + def test_load_all_single_document(self, yaml_file): + # Loading files with one document should work with the multi-doc API. + graphs = list(demes.load_all(yaml_file)) + assert len(graphs) == 1 + graph1 = graphs[0] + graph2 = demes.load(yaml_file) + graph1.assert_close(graph2) + + @pytest.mark.parametrize("simplified", [True, False]) + @pytest.mark.parametrize("graph1", tests.example_graphs()) + def test_dump_all_single_document(self, graph1, simplified): + # A single documents saved with the multi-doc API should be loadable + # with the regular single-doc API. + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile = pathlib.Path(tmpdir) / "temp.yaml" + demes.dump_all([graph1], tmpfile, simplified=simplified) + graph2 = demes.load(tmpfile) + graph1.assert_close(graph2) + + @pytest.mark.parametrize("simplified", [True, False]) + def test_round_trip_file(self, simplified): + graphs1 = tests.example_graphs() + assert len(graphs1) > 1 + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml" + demes.dump_all(graphs1, tmpfile, simplified=simplified) + graphs2 = list(demes.load_all(tmpfile)) + assert len(graphs1) == len(graphs2) + for g1, g2 in zip(graphs1, graphs2): + g1.assert_close(g2) + + @pytest.mark.parametrize("simplified", [True, False]) + def test_round_trip_stream(self, simplified): + graphs1 = tests.example_graphs() + assert len(graphs1) > 1 + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml" + with open(tmpfile, "w") as f: + demes.dump_all(graphs1, f, simplified=simplified) + with open(tmpfile) as f: + graphs2 = list(demes.load_all(f)) + assert len(graphs1) == len(graphs2) + for g1, g2 in zip(graphs1, graphs2): + g1.assert_close(g2) + + @pytest.mark.parametrize("simplified", [True, False]) + def test_round_trip_no_end_document_marker(self, simplified): + graphs1 = tests.example_graphs() + assert len(graphs1) > 1 + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml" + with open(tmpfile, "w") as f: + for j, graph in enumerate(graphs1): + if j > 0: + # Output a start marker between documents. + print("---", file=f) + demes.dump(graph, f, simplified=simplified) + graphs2 = list(demes.load_all(tmpfile)) + assert len(graphs1) == len(graphs2) + for g1, g2 in zip(graphs1, graphs2): + g1.assert_close(g2) + + @pytest.mark.parametrize("simplified", [True, False]) + def test_empty_file(self, simplified): + with tempfile.TemporaryDirectory() as tmpdir: + tmpfile = pathlib.Path(tmpdir) / "empty.yaml" + demes.dump_all([], tmpfile) + assert tmpfile.exists() + assert tmpfile.stat().st_size == 0 + graphs = list(demes.load_all(tmpfile)) + assert len(graphs) == 0 From c57210f3565bc188f8228b1f87d7c9a7b29ac0db Mon Sep 17 00:00:00 2001 From: Graham Gower Date: Tue, 22 Jun 2021 12:06:14 +0200 Subject: [PATCH 2/2] Update cross references to spec. --- demes/demes.py | 4 ++-- demes/load_dump.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/demes/demes.py b/demes/demes.py index 77e2595f..85957d73 100644 --- a/demes/demes.py +++ b/demes/demes.py @@ -2270,7 +2270,7 @@ class Builder: :ivar dict data: The data dictionary of the graph's current state. The objects nested within this dictionary follow Demes' data model, - as described in the :ref:`spec:sec_ref`. + as described in the :ref:`spec:sec_spec`. .. note:: Users may freely modify the data dictionary, as long as the data @@ -2454,7 +2454,7 @@ def fromdict(cls, data: MutableMapping[str, Any]) -> "Builder": :param MutableMapping data: The data dictionary to initialise the graph's state. The objects nested within this dictionary must - follow Demes' data model, as described in the :ref:`spec:sec_ref`. + follow Demes' data model, as described in the :ref:`spec:sec_spec`. :return: The new Builder object. :rtype: Builder diff --git a/demes/load_dump.py b/demes/load_dump.py index 11f9148f..b7260f7c 100644 --- a/demes/load_dump.py +++ b/demes/load_dump.py @@ -110,7 +110,7 @@ def loads_asdict(string, *, format="yaml") -> MutableMapping[str, Any]: """ Load a YAML or JSON string into a dictionary of nested objects. The keywords and structure of the input are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param str string: The string to be loaded. :param str format: The format of the input string. Either "yaml" or "json". @@ -126,7 +126,7 @@ def load_asdict(filename, *, format="yaml") -> MutableMapping[str, Any]: """ Load a YAML or JSON file into a dictionary of nested objects. The keywords and structure of the input are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param filename: The path to the file to be loaded, or a file-like object with a ``read()`` method. @@ -152,7 +152,7 @@ def loads(string, *, format="yaml") -> "demes.Graph": """ Load a graph from a YAML or JSON string. The keywords and structure of the input are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param str string: The string to be loaded. :param str format: The format of the input string. Either "yaml" or "json". @@ -167,7 +167,7 @@ def load(filename, *, format="yaml") -> "demes.Graph": """ Load a graph from a YAML or JSON file. The keywords and structure of the input are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param filename: The path to the file to be loaded, or a file-like object with a ``read()`` method. @@ -185,7 +185,7 @@ def load_all(filename) -> Generator["demes.Graph", None, None]: Generate graphs from a YAML document stream. Documents must be separated by the YAML document start indicator, ``---``. The keywords and structure of each document are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param filename: The path to the file to be loaded, or a file-like object with a ``read()`` method. @@ -203,7 +203,7 @@ def dumps(graph, *, format="yaml", simplified=True) -> str: """ Dump the specified graph to a YAML or JSON string. The keywords and structure of the output are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param .Graph graph: The graph to dump. :param str format: The format of the output file. Either "yaml" or "json". @@ -222,7 +222,7 @@ def dump(graph, filename, *, format="yaml", simplified=True) -> None: """ Dump the specified graph to a file. The keywords and structure of the output are defined by the - :ref:`spec:sec_ref`. + :ref:`spec:sec_spec`. :param .Graph graph: The graph to dump. :param filename: Path to the output file, or a file-like object with a