Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add load_all()/dump_all() to support multidoc YAML. #335

Merged
merged 2 commits into from
Jun 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion demes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,14 @@
Merge,
Admix,
)
from .load_dump import load_asdict, loads_asdict, load, loads, dump, dumps
from .load_dump import (
load_asdict,
loads_asdict,
load,
loads,
load_all,
dump,
dumps,
dump_all,
)
from .ms import from_ms
4 changes: 2 additions & 2 deletions demes/demes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2270,7 +2270,7 @@ class Builder:

:ivar dict data: The data dictionary of the graph's current state.
The objects nested within this dictionary follow Demes' data model,
as described in the :ref:`spec:sec_ref`.
as described in the :ref:`spec:sec_spec`.

.. note::
Users may freely modify the data dictionary, as long as the data
Expand Down Expand Up @@ -2454,7 +2454,7 @@ def fromdict(cls, data: MutableMapping[str, Any]) -> "Builder":

:param MutableMapping data: The data dictionary to initialise the
graph's state. The objects nested within this dictionary must
follow Demes' data model, as described in the :ref:`spec:sec_ref`.
follow Demes' data model, as described in the :ref:`spec:sec_spec`.

:return: The new Builder object.
:rtype: Builder
Expand Down
86 changes: 69 additions & 17 deletions demes/load_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
import io
import math
from typing import MutableMapping, Any
from typing import Any, Generator, MutableMapping

import ruamel.yaml

Expand Down Expand Up @@ -36,12 +36,22 @@ def _open_file_polymorph(polymorph, mode="r"):
# which are hopefully simple enough to not suffer from API instability.


def _load_yaml_asdict(fp):
def _load_yaml_asdict(fp) -> MutableMapping[str, Any]:
with ruamel.yaml.YAML(typ="safe") as yaml:
return yaml.load(fp)


def _dump_yaml_fromdict(data, fp):
def _dump_yaml_fromdict(data, fp, multidoc=False) -> None:
"""
Dump data dict to a YAML file-like object.

:param bool multidoc: If True, output the YAML document start line ``---``,
and document end line ``...``, which indicate the beginning and end of
a YAML document respectively. The start indicator is needed when
outputing multiple YAML documents to a single file (or file stream).
The end indicator is not strictly needed, but may be desirable
depending on the underlying communication channel.
"""
with ruamel.yaml.YAML(typ="safe", output=fp) as yaml:
# Output flow style, but only for collections that consist only
# of scalars (i.e. the leaves in the document tree).
Expand All @@ -51,6 +61,9 @@ def _dump_yaml_fromdict(data, fp):
yaml.allow_unicode = False
# Keep dict insertion order, thank you very much!
yaml.sort_base_mapping_type_on_output = False
if multidoc:
yaml.explicit_start = True
yaml.explicit_end = True
yaml.dump(data)


Expand Down Expand Up @@ -93,11 +106,11 @@ def _unstringify_infinities(data: MutableMapping[str, Any]) -> None:
migration["start_time"] = float(start_time)


def loads_asdict(string, *, format="yaml"):
def loads_asdict(string, *, format="yaml") -> MutableMapping[str, Any]:
"""
Load a YAML or JSON string into a dictionary of nested objects.
The keywords and structure of the input are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param str string: The string to be loaded.
:param str format: The format of the input string. Either "yaml" or "json".
Expand All @@ -109,11 +122,11 @@ def loads_asdict(string, *, format="yaml"):
return load_asdict(stream, format=format)


def load_asdict(filename, *, format="yaml"):
def load_asdict(filename, *, format="yaml") -> MutableMapping[str, Any]:
"""
Load a YAML or JSON file into a dictionary of nested objects.
The keywords and structure of the input are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param filename: The path to the file to be loaded, or a file-like object
with a ``read()`` method.
Expand All @@ -135,11 +148,11 @@ def load_asdict(filename, *, format="yaml"):
return data


def loads(string, *, format="yaml"):
def loads(string, *, format="yaml") -> "demes.Graph":
"""
Load a graph from a YAML or JSON string.
The keywords and structure of the input are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param str string: The string to be loaded.
:param str format: The format of the input string. Either "yaml" or "json".
Expand All @@ -150,11 +163,11 @@ def loads(string, *, format="yaml"):
return demes.Graph.fromdict(data)


def load(filename, *, format="yaml"):
def load(filename, *, format="yaml") -> "demes.Graph":
"""
Load a graph from a YAML or JSON file.
The keywords and structure of the input are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param filename: The path to the file to be loaded, or a file-like object
with a ``read()`` method.
Expand All @@ -167,16 +180,35 @@ def load(filename, *, format="yaml"):
return demes.Graph.fromdict(data)


def dumps(graph, *, format="yaml", simplified=True):
def load_all(filename) -> Generator["demes.Graph", None, None]:
"""
Generate graphs from a YAML document stream. Documents must be separated by
the YAML document start indicator, ``---``.
The keywords and structure of each document are defined by the
:ref:`spec:sec_spec`.

:param filename: The path to the file to be loaded, or a file-like object
with a ``read()`` method.
:type filename: Union[str, os.PathLike, FileLike]
:return: A generator of graphs.
:rtype: Generator[demes.Graph, None, None]
"""
with _open_file_polymorph(filename) as f:
with ruamel.yaml.YAML(typ="safe") as yaml:
for data in yaml.load_all(f):
yield demes.Graph.fromdict(data)


def dumps(graph, *, format="yaml", simplified=True) -> str:
"""
Dump the specified graph to a YAML or JSON string.
The keywords and structure of the output are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param .Graph graph: The graph to dump.
:param str format: The format of the output file. Either "yaml" or "json".
:param bool simplified: If True, returns a simplified graph. If False, returns
a complete redundant graph.
a fully-qualified graph.
:return: The YAML or JSON string.
:rtype: str
"""
Expand All @@ -186,19 +218,19 @@ def dumps(graph, *, format="yaml", simplified=True):
return string


def dump(graph, filename, *, format="yaml", simplified=True):
def dump(graph, filename, *, format="yaml", simplified=True) -> None:
"""
Dump the specified graph to a file.
The keywords and structure of the output are defined by the
:ref:`spec:sec_ref`.
:ref:`spec:sec_spec`.

:param .Graph graph: The graph to dump.
:param filename: Path to the output file, or a file-like object with a
``write()`` method.
:type filename: Union[str, os.PathLike, FileLike]
:param str format: The format of the output file. Either "yaml" or "json".
:param bool simplified: If True, outputs a simplified graph. If False, outputs
a redundant graph.
a fully-qualified graph.
"""
if simplified:
data = graph.asdict_simplified()
Expand All @@ -214,3 +246,23 @@ def dump(graph, filename, *, format="yaml", simplified=True):
_dump_yaml_fromdict(data, f)
else:
raise ValueError(f"unknown format: {format}")


def dump_all(graphs, filename, *, simplified=True) -> None:
"""
Dump the specified graphs to a multi-document YAML file or output stream.

:param graphs: An iterable of graphs to dump.
:param filename: Path to the output file, or a file-like object with a
``write()`` method.
:type filename: Union[str, os.PathLike, FileLike]
:param bool simplified: If True, outputs simplified graphs. If False, outputs
fully-qualified graphs.
"""
with _open_file_polymorph(filename, "w") as f:
for graph in graphs:
if simplified:
data = graph.asdict_simplified()
else:
data = graph.asdict()
_dump_yaml_fromdict(data, f, multidoc=True)
2 changes: 2 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
.. autofunction:: demes.load_asdict
.. autofunction:: demes.loads
.. autofunction:: demes.loads_asdict
.. autofunction:: demes.load_all
.. autofunction:: demes.dump
.. autofunction:: demes.dumps
.. autofunction:: demes.dump_all
```

## Building Demes graphs
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@
deadline=None,
suppress_health_check=[hypothesis.HealthCheck.too_slow],
)
hypothesis.settings.register_profile(
"default",
max_examples=100,
deadline=None,
suppress_health_check=[hypothesis.HealthCheck.too_slow],
)

# GitHub Actions sets the CI environment variable.
if os.getenv("CI", False):
hypothesis.settings.load_profile("ci")
else:
hypothesis.settings.load_profile("default")
75 changes: 75 additions & 0 deletions tests/test_load_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,3 +594,78 @@ def test_json_infinities_get_stringified(self):
assert data["migrations"][0]["start_time"] == "Infinity"
g2 = demes.loads(json_str, format="json")
g2.assert_close(g1)


class TestMultiDocument:
grahamgower marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("yaml_file", tests.example_files())
def test_load_all_single_document(self, yaml_file):
# Loading files with one document should work with the multi-doc API.
graphs = list(demes.load_all(yaml_file))
assert len(graphs) == 1
graph1 = graphs[0]
graph2 = demes.load(yaml_file)
graph1.assert_close(graph2)

@pytest.mark.parametrize("simplified", [True, False])
@pytest.mark.parametrize("graph1", tests.example_graphs())
def test_dump_all_single_document(self, graph1, simplified):
# A single documents saved with the multi-doc API should be loadable
# with the regular single-doc API.
with tempfile.TemporaryDirectory() as tmpdir:
tmpfile = pathlib.Path(tmpdir) / "temp.yaml"
demes.dump_all([graph1], tmpfile, simplified=simplified)
graph2 = demes.load(tmpfile)
graph1.assert_close(graph2)

@pytest.mark.parametrize("simplified", [True, False])
def test_round_trip_file(self, simplified):
graphs1 = tests.example_graphs()
assert len(graphs1) > 1
with tempfile.TemporaryDirectory() as tmpdir:
tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml"
demes.dump_all(graphs1, tmpfile, simplified=simplified)
graphs2 = list(demes.load_all(tmpfile))
assert len(graphs1) == len(graphs2)
for g1, g2 in zip(graphs1, graphs2):
g1.assert_close(g2)

@pytest.mark.parametrize("simplified", [True, False])
def test_round_trip_stream(self, simplified):
graphs1 = tests.example_graphs()
assert len(graphs1) > 1
with tempfile.TemporaryDirectory() as tmpdir:
tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml"
with open(tmpfile, "w") as f:
demes.dump_all(graphs1, f, simplified=simplified)
with open(tmpfile) as f:
graphs2 = list(demes.load_all(f))
assert len(graphs1) == len(graphs2)
for g1, g2 in zip(graphs1, graphs2):
g1.assert_close(g2)

@pytest.mark.parametrize("simplified", [True, False])
def test_round_trip_no_end_document_marker(self, simplified):
graphs1 = tests.example_graphs()
assert len(graphs1) > 1
with tempfile.TemporaryDirectory() as tmpdir:
tmpfile = pathlib.Path(tmpdir) / "multidoc.yaml"
with open(tmpfile, "w") as f:
for j, graph in enumerate(graphs1):
if j > 0:
# Output a start marker between documents.
print("---", file=f)
demes.dump(graph, f, simplified=simplified)
graphs2 = list(demes.load_all(tmpfile))
assert len(graphs1) == len(graphs2)
for g1, g2 in zip(graphs1, graphs2):
g1.assert_close(g2)

@pytest.mark.parametrize("simplified", [True, False])
def test_empty_file(self, simplified):
with tempfile.TemporaryDirectory() as tmpdir:
tmpfile = pathlib.Path(tmpdir) / "empty.yaml"
demes.dump_all([], tmpfile)
assert tmpfile.exists()
assert tmpfile.stat().st_size == 0
graphs = list(demes.load_all(tmpfile))
assert len(graphs) == 0