From 491b79085523f392522bb7fcd5067b6a5ce16f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Fri, 10 Apr 2020 01:43:06 +0200 Subject: [PATCH 1/3] Support overriding anchors In YAML 1.2, anchors can be reused. https://yaml.org/spec/1.2/spec.html#id2786196 --- lib/yaml/composer.py | 5 ----- tests/data/duplicate-anchor-1.loader-error | 3 --- tests/data/override-anchor-1.code | 1 + tests/data/override-anchor-1.data | 4 ++++ tests/data/override-anchor-2.code | 1 + ...uplicate-anchor-2.loader-error => override-anchor-2.data} | 0 6 files changed, 6 insertions(+), 8 deletions(-) delete mode 100644 tests/data/duplicate-anchor-1.loader-error create mode 100644 tests/data/override-anchor-1.code create mode 100644 tests/data/override-anchor-1.data create mode 100644 tests/data/override-anchor-2.code rename tests/data/{duplicate-anchor-2.loader-error => override-anchor-2.data} (100%) diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py index 6d15cb40..a034c5f7 100644 --- a/lib/yaml/composer.py +++ b/lib/yaml/composer.py @@ -70,11 +70,6 @@ def compose_node(self, parent, index): return self.anchors[anchor] event = self.peek_event() anchor = event.anchor - if anchor is not None: - if anchor in self.anchors: - raise ComposerError("found duplicate anchor %r; first occurrence" - % anchor, self.anchors[anchor].start_mark, - "second occurrence", event.start_mark) self.descend_resolver(parent, index) if self.check_event(ScalarEvent): node = self.compose_scalar_node(anchor) diff --git a/tests/data/duplicate-anchor-1.loader-error b/tests/data/duplicate-anchor-1.loader-error deleted file mode 100644 index 906cf29d..00000000 --- a/tests/data/duplicate-anchor-1.loader-error +++ /dev/null @@ -1,3 +0,0 @@ -- &foo bar -- &bar bar -- &foo bar diff --git a/tests/data/override-anchor-1.code b/tests/data/override-anchor-1.code new file mode 100644 index 00000000..ea275187 --- /dev/null +++ b/tests/data/override-anchor-1.code @@ -0,0 +1 @@ +(['bar', 'bar', 'baz', 'baz']) diff --git a/tests/data/override-anchor-1.data b/tests/data/override-anchor-1.data new file mode 100644 index 00000000..a97f71e6 --- /dev/null +++ b/tests/data/override-anchor-1.data @@ -0,0 +1,4 @@ +- &foo bar +- *foo +- &foo baz +- *foo diff --git a/tests/data/override-anchor-2.code b/tests/data/override-anchor-2.code new file mode 100644 index 00000000..12bae17c --- /dev/null +++ b/tests/data/override-anchor-2.code @@ -0,0 +1 @@ +[1, 2, 3, 4] diff --git a/tests/data/duplicate-anchor-2.loader-error b/tests/data/override-anchor-2.data similarity index 100% rename from tests/data/duplicate-anchor-2.loader-error rename to tests/data/override-anchor-2.data From bd96e4000cff5a53bee565969871fbe4c8142a1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Thu, 23 Sep 2021 19:31:05 +0200 Subject: [PATCH 2/3] Make allowing repeated anchors optional Add option reuse_anchors as requested by @ingydotnet --- lib/yaml/__init__.py | 22 +++++++++++----------- lib/yaml/composer.py | 8 +++++++- lib/yaml/loader.py | 20 ++++++++++---------- tests/lib/canonical.py | 4 ++-- tests/lib/test_constructor.py | 2 +- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index 8c71105b..9ec86101 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -26,22 +26,22 @@ def warnings(settings=None): return {} #------------------------------------------------------------------------------ -def scan(stream, Loader=Loader): +def scan(stream, Loader=Loader, reuse_anchors=False): """ Scan a YAML stream and produce scanning tokens. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: while loader.check_token(): yield loader.get_token() finally: loader.dispose() -def parse(stream, Loader=Loader): +def parse(stream, Loader=Loader, reuse_anchors=False): """ Parse a YAML stream and produce parsing events. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: while loader.check_event(): yield loader.get_event() @@ -53,41 +53,41 @@ def compose(stream, Loader=Loader): Parse the first YAML document in a stream and produce the corresponding representation tree. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: return loader.get_single_node() finally: loader.dispose() -def compose_all(stream, Loader=Loader): +def compose_all(stream, Loader=Loader, reuse_anchors=False): """ Parse all YAML documents in a stream and produce corresponding representation trees. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: while loader.check_node(): yield loader.get_node() finally: loader.dispose() -def load(stream, Loader): +def load(stream, Loader, reuse_anchors=False): """ Parse the first YAML document in a stream and produce the corresponding Python object. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: return loader.get_single_data() finally: loader.dispose() -def load_all(stream, Loader): +def load_all(stream, Loader, reuse_anchors=False): """ Parse all YAML documents in a stream and produce corresponding Python objects. """ - loader = Loader(stream) + loader = Loader(stream, reuse_anchors=reuse_anchors) try: while loader.check_data(): yield loader.get_data() diff --git a/lib/yaml/composer.py b/lib/yaml/composer.py index a034c5f7..b6bb1a12 100644 --- a/lib/yaml/composer.py +++ b/lib/yaml/composer.py @@ -10,8 +10,9 @@ class ComposerError(MarkedYAMLError): class Composer: - def __init__(self): + def __init__(self, reuse_anchors=False): self.anchors = {} + self.reuse_anchors=reuse_anchors def check_node(self): # Drop the STREAM-START event. @@ -70,6 +71,11 @@ def compose_node(self, parent, index): return self.anchors[anchor] event = self.peek_event() anchor = event.anchor + if anchor is not None: + if anchor in self.anchors and not self.reuse_anchors: + raise ComposerError("found duplicate anchor %r; first occurrence" + % anchor, self.anchors[anchor].start_mark, + "second occurrence", event.start_mark) self.descend_resolver(parent, index) if self.check_event(ScalarEvent): node = self.compose_scalar_node(anchor) diff --git a/lib/yaml/loader.py b/lib/yaml/loader.py index e90c1122..7af14def 100644 --- a/lib/yaml/loader.py +++ b/lib/yaml/loader.py @@ -10,41 +10,41 @@ class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): Reader.__init__(self, stream) Scanner.__init__(self) Parser.__init__(self) - Composer.__init__(self) + Composer.__init__(self, reuse_anchors=reuse_anchors) BaseConstructor.__init__(self) BaseResolver.__init__(self) class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): Reader.__init__(self, stream) Scanner.__init__(self) Parser.__init__(self) - Composer.__init__(self) + Composer.__init__(self, reuse_anchors=reuse_anchors) FullConstructor.__init__(self) Resolver.__init__(self) class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): Reader.__init__(self, stream) Scanner.__init__(self) Parser.__init__(self) - Composer.__init__(self) + Composer.__init__(self, reuse_anchors=reuse_anchors) SafeConstructor.__init__(self) Resolver.__init__(self) class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): Reader.__init__(self, stream) Scanner.__init__(self) Parser.__init__(self) - Composer.__init__(self) + Composer.__init__(self, reuse_anchors=reuse_anchors) Constructor.__init__(self) Resolver.__init__(self) @@ -54,10 +54,10 @@ def __init__(self, stream): # to ensure backwards compatibility. class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): Reader.__init__(self, stream) Scanner.__init__(self) Parser.__init__(self) - Composer.__init__(self) + Composer.__init__(self, reuse_anchors=reuse_anchors) Constructor.__init__(self) Resolver.__init__(self) diff --git a/tests/lib/canonical.py b/tests/lib/canonical.py index a8b4e3a7..d60607cc 100644 --- a/tests/lib/canonical.py +++ b/tests/lib/canonical.py @@ -318,12 +318,12 @@ def peek_event(self): class CanonicalLoader(CanonicalScanner, CanonicalParser, yaml.composer.Composer, yaml.constructor.Constructor, yaml.resolver.Resolver): - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): if hasattr(stream, 'read'): stream = stream.read() CanonicalScanner.__init__(self, stream) CanonicalParser.__init__(self) - yaml.composer.Composer.__init__(self) + yaml.composer.Composer.__init__(self, reuse_anchors=reuse_anchors) yaml.constructor.Constructor.__init__(self) yaml.resolver.Resolver.__init__(self) diff --git a/tests/lib/test_constructor.py b/tests/lib/test_constructor.py index 0783a21b..f7d19956 100644 --- a/tests/lib/test_constructor.py +++ b/tests/lib/test_constructor.py @@ -258,7 +258,7 @@ def test_constructor_types(data_filename, code_filename, verbose=False): native2 = None try: with open(data_filename, 'rb') as file: - native1 = list(yaml.load_all(file, Loader=MyLoader)) + native1 = list(yaml.load_all(file, Loader=MyLoader, reuse_anchors=True)) if len(native1) == 1: native1 = native1[0] with open(code_filename, 'rb') as file: From 737ad62df0352a62c97e9fdbd0efe7860f7f1791 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tina=20M=C3=BCller?= Date: Thu, 23 Sep 2021 19:56:28 +0200 Subject: [PATCH 3/3] Also add reuse_anchors to libyaml backend --- lib/yaml/__init__.py | 2 +- lib/yaml/cyaml.py | 20 ++++++------- tests/data/duplicate-anchor-1.loader-error | 3 ++ ...e => override-anchor-1.reuse-anchors-code} | 0 ...a => override-anchor-1.reuse-anchors-data} | 0 ...e => override-anchor-2.reuse-anchors-code} | 0 ...a => override-anchor-2.reuse-anchors-data} | 0 tests/lib/test_constructor.py | 30 ++++++++++++++++++- tests/lib/test_yaml_ext.py | 24 +++++++-------- yaml/_yaml.pyx | 5 ++-- 10 files changed, 58 insertions(+), 26 deletions(-) create mode 100644 tests/data/duplicate-anchor-1.loader-error rename tests/data/{override-anchor-1.code => override-anchor-1.reuse-anchors-code} (100%) rename tests/data/{override-anchor-1.data => override-anchor-1.reuse-anchors-data} (100%) rename tests/data/{override-anchor-2.code => override-anchor-2.reuse-anchors-code} (100%) rename tests/data/{override-anchor-2.data => override-anchor-2.reuse-anchors-data} (100%) diff --git a/lib/yaml/__init__.py b/lib/yaml/__init__.py index 9ec86101..c8546def 100644 --- a/lib/yaml/__init__.py +++ b/lib/yaml/__init__.py @@ -48,7 +48,7 @@ def parse(stream, Loader=Loader, reuse_anchors=False): finally: loader.dispose() -def compose(stream, Loader=Loader): +def compose(stream, Loader=Loader, reuse_anchors=False): """ Parse the first YAML document in a stream and produce the corresponding representation tree. diff --git a/lib/yaml/cyaml.py b/lib/yaml/cyaml.py index 0c213458..fd28a1c0 100644 --- a/lib/yaml/cyaml.py +++ b/lib/yaml/cyaml.py @@ -15,36 +15,36 @@ class CBaseLoader(CParser, BaseConstructor, BaseResolver): - def __init__(self, stream): - CParser.__init__(self, stream) + def __init__(self, stream, reuse_anchors=False): + CParser.__init__(self, stream, reuse_anchors=reuse_anchors) BaseConstructor.__init__(self) BaseResolver.__init__(self) class CSafeLoader(CParser, SafeConstructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) + def __init__(self, stream, reuse_anchors=False): + CParser.__init__(self, stream, reuse_anchors=reuse_anchors) SafeConstructor.__init__(self) Resolver.__init__(self) class CFullLoader(CParser, FullConstructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) + def __init__(self, stream, reuse_anchors=False): + CParser.__init__(self, stream, reuse_anchors=reuse_anchors) FullConstructor.__init__(self) Resolver.__init__(self) class CUnsafeLoader(CParser, UnsafeConstructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) + def __init__(self, stream, reuse_anchors=False): + CParser.__init__(self, stream, reuse_anchors=reuse_anchors) UnsafeConstructor.__init__(self) Resolver.__init__(self) class CLoader(CParser, Constructor, Resolver): - def __init__(self, stream): - CParser.__init__(self, stream) + def __init__(self, stream, reuse_anchors=False): + CParser.__init__(self, stream, reuse_anchors=reuse_anchors) Constructor.__init__(self) Resolver.__init__(self) diff --git a/tests/data/duplicate-anchor-1.loader-error b/tests/data/duplicate-anchor-1.loader-error new file mode 100644 index 00000000..906cf29d --- /dev/null +++ b/tests/data/duplicate-anchor-1.loader-error @@ -0,0 +1,3 @@ +- &foo bar +- &bar bar +- &foo bar diff --git a/tests/data/override-anchor-1.code b/tests/data/override-anchor-1.reuse-anchors-code similarity index 100% rename from tests/data/override-anchor-1.code rename to tests/data/override-anchor-1.reuse-anchors-code diff --git a/tests/data/override-anchor-1.data b/tests/data/override-anchor-1.reuse-anchors-data similarity index 100% rename from tests/data/override-anchor-1.data rename to tests/data/override-anchor-1.reuse-anchors-data diff --git a/tests/data/override-anchor-2.code b/tests/data/override-anchor-2.reuse-anchors-code similarity index 100% rename from tests/data/override-anchor-2.code rename to tests/data/override-anchor-2.reuse-anchors-code diff --git a/tests/data/override-anchor-2.data b/tests/data/override-anchor-2.reuse-anchors-data similarity index 100% rename from tests/data/override-anchor-2.data rename to tests/data/override-anchor-2.reuse-anchors-data diff --git a/tests/lib/test_constructor.py b/tests/lib/test_constructor.py index f7d19956..968db71f 100644 --- a/tests/lib/test_constructor.py +++ b/tests/lib/test_constructor.py @@ -258,7 +258,7 @@ def test_constructor_types(data_filename, code_filename, verbose=False): native2 = None try: with open(data_filename, 'rb') as file: - native1 = list(yaml.load_all(file, Loader=MyLoader, reuse_anchors=True)) + native1 = list(yaml.load_all(file, Loader=MyLoader)) if len(native1) == 1: native1 = native1[0] with open(code_filename, 'rb') as file: @@ -296,6 +296,34 @@ def test_subclass_blacklist_types(data_filename, verbose=False): test_subclass_blacklist_types.unittest = ['.subclass_blacklist'] +def test_reuse_anchors(data_filename, code_filename, verbose=False): + try: + with open(data_filename, 'rb') as file: + native1 = list(yaml.load_all(file, Loader=yaml.SafeLoader, reuse_anchors=True)) + if len(native1) == 1: + native1 = native1[0] + with open(code_filename, 'rb') as file: + native2 = _load_code(file.read()) + try: + if native1 == native2: + return + except TypeError: + pass + if verbose: + print("SERIALIZED NATIVE1:") + print(_serialize_value(native1)) + print("SERIALIZED NATIVE2:") + print(_serialize_value(native2)) + assert _serialize_value(native1) == _serialize_value(native2), (native1, native2) + finally: + if verbose: + print("NATIVE1:") + pprint.pprint(native1) + print("NATIVE2:") + pprint.pprint(native2) + +test_reuse_anchors.unittest = ['.reuse-anchors-data', '.reuse-anchors-code'] + if __name__ == '__main__': import sys, test_constructor sys.modules['test_constructor'] = sys.modules['__main__'] diff --git a/tests/lib/test_yaml_ext.py b/tests/lib/test_yaml_ext.py index e1c4bb81..2a2916f1 100644 --- a/tests/lib/test_yaml_ext.py +++ b/tests/lib/test_yaml_ext.py @@ -18,28 +18,28 @@ def new_parse(stream, Loader=yaml.CLoader): return old_parse(stream, Loader) old_compose = yaml.compose -def new_compose(stream, Loader=yaml.CLoader): - return old_compose(stream, Loader) +def new_compose(stream, Loader=yaml.CLoader, reuse_anchors=False): + return old_compose(stream, Loader, reuse_anchors=reuse_anchors) old_compose_all = yaml.compose_all -def new_compose_all(stream, Loader=yaml.CLoader): - return old_compose_all(stream, Loader) +def new_compose_all(stream, Loader=yaml.CLoader, reuse_anchors=False): + return old_compose_all(stream, Loader, reuse_anchors=reuse_anchors) old_load = yaml.load -def new_load(stream, Loader=yaml.CLoader): - return old_load(stream, Loader) +def new_load(stream, Loader=yaml.CLoader, reuse_anchors=False): + return old_load(stream, Loader, reuse_anchors=reuse_anchors) old_load_all = yaml.load_all -def new_load_all(stream, Loader=yaml.CLoader): - return old_load_all(stream, Loader) +def new_load_all(stream, Loader=yaml.CLoader, reuse_anchors=False): + return old_load_all(stream, Loader, reuse_anchors=reuse_anchors) old_safe_load = yaml.safe_load -def new_safe_load(stream): - return old_load(stream, yaml.CSafeLoader) +def new_safe_load(stream, reuse_anchors=False): + return old_load(stream, yaml.CSafeLoader, reuse_anchors=reuse_anchors) old_safe_load_all = yaml.safe_load_all -def new_safe_load_all(stream): - return old_load_all(stream, yaml.CSafeLoader) +def new_safe_load_all(stream, reuse_anchors=False): + return old_load_all(stream, yaml.CSafeLoader, reuse_anchors=reuse_anchors) old_emit = yaml.emit def new_emit(events, stream=None, Dumper=yaml.CDumper, **kwds): diff --git a/yaml/_yaml.pyx b/yaml/_yaml.pyx index e3e93e2c..01e98146 100644 --- a/yaml/_yaml.pyx +++ b/yaml/_yaml.pyx @@ -256,7 +256,8 @@ cdef class CParser: cdef int stream_cache_pos cdef int unicode_source - def __init__(self, stream): + def __init__(self, stream, reuse_anchors=False): + self.reuse_anchors=reuse_anchors cdef is_readable if yaml_parser_initialize(&self.parser) == 0: raise MemoryError @@ -714,7 +715,7 @@ cdef class CParser: and self.parsed_event.data.mapping_start.anchor != NULL: anchor = PyUnicode_FromYamlString(self.parsed_event.data.mapping_start.anchor) if anchor is not None: - if anchor in self.anchors: + if anchor in self.anchors and not self.reuse_anchors: mark = Mark(self.stream_name, self.parsed_event.start_mark.index, self.parsed_event.start_mark.line,