Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an optional extended parser subclass (YAMLAnchorReplayingFactory) able to inline anchors #502

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ Heiko Boettger (@HeikoBoettger)

* Contributed #482: (yaml) Allow passing `ParserImpl` by a subclass or overwrite the events
(2.18.0)
* Contributed #502: (yaml) Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
able to inline anchors
(2.19.0)

Burdyug Pavel (@Pavel38l)

Expand Down
4 changes: 3 additions & 1 deletion release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ Active Maintainers:

2.19.0 (not yet released)

-
#502: Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
able to inline anchors
(contributed by Heiko B)

2.18.2 (27-Nov-2024)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.CharArrayReader;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;

import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A subclass of YAMLFactory with the only purpose to replace the YAMLParser by
* the YAMLAnchorReplayingParser subclass.
*
* @since 2.19
*/
public class YAMLAnchorReplayingFactory extends YAMLFactory {
private static final long serialVersionUID = 1L;

public YAMLAnchorReplayingFactory() {
super();
}

public YAMLAnchorReplayingFactory(ObjectCodec oc) {
super(oc);
}

public YAMLAnchorReplayingFactory(YAMLFactory src, ObjectCodec oc) {
super(src, oc);
}

protected YAMLAnchorReplayingFactory(YAMLFactoryBuilder b) {
super(b);
}

@Override
public YAMLAnchorReplayingFactory copy() {
_checkInvalidCopy(YAMLAnchorReplayingFactory.class);
return new YAMLAnchorReplayingFactory(this, (ObjectCodec) null);
}

@Override
protected Object readResolve() {
return new YAMLAnchorReplayingFactory(this, _objectCodec);
}

@Override
protected YAMLParser _createParser(InputStream input, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec,
_createReader(input, (JsonEncoding) null, ctxt));
}

@Override
protected YAMLParser _createParser(Reader r, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, r);
}

@Override
protected YAMLParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, new CharArrayReader(data, offset, len));
}

@Override
protected YAMLParser _createParser(byte[] data, int offset, int len, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, _createReader(data, offset, len, (JsonEncoding) null, ctxt));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.Reader;
import java.io.IOException;

import java.util.*;

import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.events.*;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A parser that remembers the events of anchored parts in yaml and repeats them
* to inline these parts when an alias if found instead of only returning an alias.
*<p>
* Note: this overwrites the getEvent() since the base `super.nextToken()` manages to much state and
* it seems to be much simpler to re-emit the events.
*
* @since 2.19
*/
public class YAMLAnchorReplayingParser extends YAMLParser
{
private static class AnchorContext {
public final String anchor;
public final List<Event> events = new ArrayList<>();
public int depth = 1;

public AnchorContext(String anchor) {
this.anchor = anchor;
}
}

/**
* the maximum number of events that can be replayed
*/
public static final int MAX_EVENTS = 9999;

/**
* the maximum limit of anchors to remember
*/
public static final int MAX_ANCHORS = 9999;
HeikoBoettger-KarlStorz marked this conversation as resolved.
Show resolved Hide resolved

/**
* the maximum limit of merges to follow
*/
public static final int MAX_MERGES = 9999;

/**
* the maximum limit of references to remember
*/
public static final int MAX_REFS = 9999;

/**
* Remembers when a merge has been started in order to skip the corresponding
* sequence end which needs to be excluded
*/
private final ArrayDeque<Integer> mergeStack = new ArrayDeque<>();

/**
* Collects nested anchor definitions
*/
private final ArrayDeque<AnchorContext> tokenStack = new ArrayDeque<>();

/**
* Keeps track of the last sequentially found definition of each anchor
*/
private final Map<String, List<Event>> referencedObjects = new HashMap<>();

/**
* Keeps track of events that have been insert when processing alias
*/
private final ArrayDeque<Event> refEvents = new ArrayDeque<>();

/**
* keeps track of the global depth of nested collections
*/
private int globalDepth = 0;

public YAMLAnchorReplayingParser(IOContext ctxt, int parserFeatures, int formatFeatures, LoaderOptions loaderOptions, ObjectCodec codec, Reader reader) {
super(ctxt, parserFeatures, formatFeatures, loaderOptions, codec, reader);
}

private void finishContext(AnchorContext context) throws StreamConstraintsException {
if (referencedObjects.size() + 1 > MAX_REFS) throw new StreamConstraintsException("too many references in the document");
referencedObjects.put(context.anchor, context.events);
if (!tokenStack.isEmpty()) {
List<Event> events = tokenStack.peek().events;
if (events.size() + context.events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
events.addAll(context.events);
}
}

protected Event trackDepth(Event event) {
if (event instanceof CollectionStartEvent) {
++globalDepth;
} else if (event instanceof CollectionEndEvent) {
--globalDepth;
}
return event;
}

protected Event filterEvent(Event event) {
if (event instanceof MappingEndEvent) {
if (!mergeStack.isEmpty()) {
if (mergeStack.peek() > globalDepth) {
mergeStack.pop();
return null;
}
}
}
return event;
}

@Override
protected Event getEvent() throws IOException {
while(!refEvents.isEmpty()) {
Event event = filterEvent(trackDepth(refEvents.removeFirst()));
if (event != null) return event;
}

Event event = null;
while (event == null) {
event = trackDepth(super.getEvent());
if (event == null) return null;
event = filterEvent(event);
}

if (event instanceof AliasEvent) {
AliasEvent alias = (AliasEvent) event;
List<Event> events = referencedObjects.get(alias.getAnchor());
if (events != null) {
if (refEvents.size() + events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
refEvents.addAll(events);
return refEvents.removeFirst();
}
throw new JsonParseException("invalid alias " + alias.getAnchor());
}

if (event instanceof NodeEvent) {
String anchor = ((NodeEvent) event).getAnchor();
if (anchor != null) {
AnchorContext context = new AnchorContext(anchor);
context.events.add(event);
if (event instanceof CollectionStartEvent) {
if (tokenStack.size() + 1 > MAX_ANCHORS) throw new StreamConstraintsException("too many anchors in the document");
tokenStack.push(context);
} else {
// directly store it
finishContext(context);
}
return event;
}
}

if (event instanceof ScalarEvent) {
ScalarEvent scalarEvent = (ScalarEvent) event;
if (scalarEvent.getValue().equals( "<<")) {
// expect next node to be a map
Event next = getEvent();
if (next instanceof MappingStartEvent) {
if (mergeStack.size() + 1 > MAX_MERGES) throw new StreamConstraintsException("too many merges in the document");
mergeStack.push(globalDepth);
return getEvent();
}
throw new JsonParseException("found field '<<' but value isn't a map");
}
}

if (!tokenStack.isEmpty()) {
AnchorContext context = tokenStack.peek();
if (context.events.size() + 1 > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
context.events.add(event);
if (event instanceof CollectionStartEvent) {
++context.depth;
} else if (event instanceof CollectionEndEvent) {
--context.depth;
if (context.depth == 0) {
tokenStack.pop();
finishContext(context);
}
}
}
return event;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -580,13 +580,16 @@ public JsonToken nextToken() throws IOException
/**
* Since the parserImpl cannot be replaced allow subclasses to at least be able to
* influence the events being consumed.
*
*<p>
* A particular use case is working around the lack of anchor and alias support to
* emit additional events.
*<p>
* NOTE: since 2.18, declared to throw {@link IOException} to allow sub-classes
* to do so.
*
* @since 2.18
*/
protected Event getEvent() {
protected Event getEvent() throws IOException {
return _yamlParser.getEvent();
}

Expand Down
Loading