Skip to content

Commit

Permalink
Disallow duplicate keys when loading dicts from YAML
Browse files Browse the repository at this point in the history
Part of the fix for: pyvec/naucse#2
Workaround for: yaml/pyyaml#165

The workaround is incomplete, but should be OK for our uses.
  • Loading branch information
encukou committed Jan 10, 2020
1 parent 2c038c1 commit 658197e
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 1 deletion.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ licensed under the same license.
In that case, the `subtitle` must be present, and the `title` is generated
as `"{lesson title} – {page subtitle}"`.
* Timezone information is passed through
* Mappings read from YAML must have unique keys.


### naucse_render 1.2
Expand Down
27 changes: 26 additions & 1 deletion naucse_render/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,36 @@
import yaml


class YamlLoader(yaml.SafeLoader):
"""Custom YAML loader"""

# Disallow duplicate keys.
# Workaround for PyYAML issue: https://github.com/yaml/pyyaml/issues/165
# This disables some uses of YAML merge (`<<`)
# (see the xfailed test_read_yaml_allow_merge)

def construct_maping(loader, node, deep=False):
"""Construct a YAML mapping node, avoiding duplicates"""
loader.flatten_mapping(node)
result = {}
for key_node, value_node in node.value:
key = loader.construct_object(key_node, deep=deep)
if key in result:
raise yaml.constructor.ConstructorError(f'Duplicate key {key}')
result[key] = loader.construct_object(value_node, deep=deep)
return result

YamlLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
construct_maping,
)


@functools.lru_cache()
def _read_yaml(path, stat):
print('Loading', path, file=sys.stderr)
with path.open(encoding='utf-8') as f:
return yaml.safe_load(f)
return yaml.load(f, Loader=YamlLoader)


def read_yaml(base_path, *path_parts, source_key=None):
Expand Down
36 changes: 36 additions & 0 deletions test_naucse_render/test_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import yaml

from naucse_render.load import read_yaml, _read_yaml

Expand Down Expand Up @@ -73,3 +74,38 @@ def test_isolated_load(tmp_path):

data = read_yaml(tmp_path, 'test.yaml')
assert data == {'data': {'a': 1, 'b': 2}}


def test_read_yaml_disallow_duplicate_keys(tmp_path):
"""Assert that read_yaml disallows duplicate keys"""
yaml_path = tmp_path / 'test.yaml'
yaml_path.write_text("""data:
a: 1
a: 2
""")
with pytest.raises(yaml.constructor.ConstructorError):
read_yaml(tmp_path, 'test.yaml')


@pytest.mark.xfail(
strict=True,
reason="Incomplete workaround for https://github.com/yaml/pyyaml/issues/165"
)
def test_read_yaml_allow_merge(tmp_path):
"""Assert that read_yaml allows overriding dict keys in YAML merge"""
yaml_path = tmp_path / 'test.yaml'
yaml_path.write_text("""data:
1:
<<: &common
a: a
b: b
a: override
2:
<<: *common
b: override
""")
data = read_yaml(tmp_path, 'test.yaml')
assert data == {
1: {'a': 'override', 'b': 'b'},
2: {'a': 'a', 'b': 'override'},
}

0 comments on commit 658197e

Please sign in to comment.