From d8454110baa3f4052dbb61323cbd2aa651be9f11 Mon Sep 17 00:00:00 2001 From: Ry Biesemeyer Date: Tue, 24 May 2022 07:48:47 -0700 Subject: [PATCH] Field Reference: handle special characters (#14044) * add failing tests for Event.new with field that look like field references * fix: correctly handle FieldReference-special characters in field names. Keys passed to most methods of `ConvertedMap`, based on `IdentityHashMap` depend on identity and not equivalence, and therefore rely on the keys being _interned_ strings. In order to avoid hitting the JVM's global String intern pool (which can have performance problems), operations to normalize a string to its interned counterpart have traditionally relied on the behaviour of `FieldReference#from` returning a likely-cached `FieldReference`, that had an interned `key` and an empty `path`. This is problematic on two points. First, when `ConvertedMap` was given data with keys that _were_ valid string field references representing a nested field (such as `[host][geo][location]`), the implementation of `ConvertedMap#put` effectively silently discarded the path components because it assumed them to be empty, and only the key was kept (`location`). Second, when `ConvertedMap` was given a map whose keys contained what the field reference parser considered special characters but _were NOT_ valid field references, the resulting `FieldReference.IllegalSyntaxException` caused the operation to abort. Instead of using the `FieldReference` cache, which sits on top of objects whose `key` and `path`-components are known to have been interned, we introduce an internment helper on our `ConvertedMap` that is also backed by the global string intern pool, and ensure that our field references are primed through this pool. In addition to fixing the `ConvertedMap#newFromMap` functionality, this has three net effects: - Our ConvertedMap operations still use strings from the global intern pool - We have a new, smaller cache of individual field names, improving lookup performance - Our FieldReference cache no longer is flooded with fragments and therefore is more likely to remain performant NOTE: this does NOT create isolated intern pools, as doing so would require a careful audit of the possible code-paths to `ConvertedMap#putInterned`. The new cache is limited to 10k strings, and when more are used only the FIRST 10k strings will be primed into the cache, leaving the remainder to always hit the global String intern pool. NOTE: by fixing this bug, we alow events to be created whose fields _CANNOT_ be referenced with the existing FieldReference implementation. Resolves: https://github.com/elastic/logstash/issues/13606 Resolves: https://github.com/elastic/logstash/issues/11608 * field_reference: support escape sequences Adds a `config.field_reference.escape_style` option and a companion command-line flag `--field-reference-escape-style` allowing a user to opt into one of two proposed escape-sequence implementations for field reference parsing: - `PERCENT`: URI-style `%`+`HH` hexadecimal encoding of UTF-8 bytes - `AMPERSAND`: HTML-style `&#`+`DD`+`;` encoding of decimal Unicode code-points The default is `NONE`, which does _not_ proccess escape sequences. With this setting a user effectively cannot reference a field whose name contains FieldReference-reserved characters. | ESCAPE STYLE | `[` | `]` | | ------------ | ------- | ------- | | `NONE` | _N/A_ | _N/A_ | | `PERCENT` | `%5B` | `%5D` | | `AMPERSAND` | `[` | `]` | * fixup: no need to double-escape HTML-ish escape sequences in docs * Apply suggestions from code review Co-authored-by: Karol Bucek * field-reference: load escape style in runner * docs: sentences over semiciolons * field-reference: faster shortcut for PERCENT escape mode * field-reference: escape mode control downcase * field_reference: more s/experimental/technical preview/ * field_reference: still more s/experimental/technical preview/ Co-authored-by: Karol Bucek --- docs/static/field-reference.asciidoc | 12 + docs/static/settings-file.asciidoc | 17 +- logstash-core/lib/logstash/environment.rb | 1 + logstash-core/lib/logstash/runner.rb | 13 + logstash-core/locales/en.yml | 29 ++ logstash-core/spec/logstash/event_spec.rb | 14 + .../main/java/org/logstash/ConvertedMap.java | 49 ++- .../java/org/logstash/FieldReference.java | 67 +++- .../java/org/logstash/util/EscapeHandler.java | 74 ++++ .../src/test/java/org/logstash/EventTest.java | 15 + .../java/org/logstash/FieldReferenceTest.java | 355 +++++++++++------- .../ext/JrubyEventExtLibraryTest.java | 20 +- 12 files changed, 504 insertions(+), 162 deletions(-) create mode 100644 logstash-core/src/main/java/org/logstash/util/EscapeHandler.java diff --git a/docs/static/field-reference.asciidoc b/docs/static/field-reference.asciidoc index 0b295572289..830c0912e28 100644 --- a/docs/static/field-reference.asciidoc +++ b/docs/static/field-reference.asciidoc @@ -5,6 +5,7 @@ It is often useful to be able to refer to a field or collection of fields by nam you can use the Logstash field reference syntax. The syntax to access a field specifies the entire path to the field, with each fragment wrapped in square brackets. +When a field name contains square brackets, they must be properly <>. _Field References_ can be expressed literally within <> statements in your pipeline configurations, as string arguments to your pipeline plugins, or within sprintf statements that will be used by your pipeline plugins: @@ -133,3 +134,14 @@ embeddedFieldReference ; An _Embedded Field Reference_ is a _Field Reference_ that is itself wrapped in square brackets (`[` and `]`), and can be a component of a _Composite Field Reference_. + +[float] +[[formal-grammar-escape-sequences]] +=== Escape Sequences + +For {ls} to reference a field whose name contains a character that has special meaning in the field reference grammar, the character must be escaped. +Logstash can be globally configured to use one of two field reference escape modes: + + - `none` (default): no escape sequence processing is done. Fields containing literal square brackets cannot be referenced by the Event API. + - `percent`: URI-style percent encoding of UTF-8 bytes. The left square bracket (`[`) is expressed as `%5B`, and the right square bracket (`]`) is expressed as `%5D`. + - `ampersand`: HTML-style ampersand encoding (`&#` + decimal unicode codepoint + `;`). The left square bracket (`[`) is expressed as `[`, and the right square bracket (`]`) is expressed as `]`. diff --git a/docs/static/settings-file.asciidoc b/docs/static/settings-file.asciidoc index 2c4146fc96d..b4770e92771 100644 --- a/docs/static/settings-file.asciidoc +++ b/docs/static/settings-file.asciidoc @@ -150,7 +150,7 @@ Values other than `disabled` are currently considered BETA, and may produce unin | `config.string` | A string that contains the pipeline configuration to use for the main pipeline. Use the same syntax as the config file. -| None +| _N/A_ | `config.test_and_exit` | When set to `true`, checks that the configuration is valid and then exits. Note that grok patterns are not checked for @@ -178,9 +178,22 @@ Values other than `disabled` are currently considered BETA, and may produce unin | When set to `true`, quoted strings will process the following escape sequences: `\n` becomes a literal newline (ASCII 10). `\r` becomes a literal carriage return (ASCII 13). `\t` becomes a literal tab (ASCII 9). `\\` becomes a literal backslash `\`. `\"` becomes a literal double quotation mark. `\'` becomes a literal quotation mark. | `false` +| `config.field_reference.escape_style` +a| Provides a way to reference fields that contain <> `[` and `]`. + +NOTE: This feature is in *technical preview* and may change in the future. + +Current options are: + +* `percent`: URI-style `%`{plus}`HH` hexadecimal encoding of UTF-8 bytes (`[` -> `%5B`; `]` -> `%5D`) +* `ampersand`: HTML-style `&#`{plus}`DD`{plus}`;` encoding of decimal Unicode code-points (`[` -> `[`; `]` -> `]`) +* `none`: field names containing special characters _cannot_ be referenced. + +| `none` + | `modules` | When configured, `modules` must be in the nested YAML structure described above this table. -| None +| _N/A_ | `queue.type` | The internal queuing model to use for event buffering. Specify `memory` for legacy in-memory based queuing, or `persisted` for disk-based ACKed queueing (<>). diff --git a/logstash-core/lib/logstash/environment.rb b/logstash-core/lib/logstash/environment.rb index 75760925e35..4c03ff22460 100644 --- a/logstash-core/lib/logstash/environment.rb +++ b/logstash-core/lib/logstash/environment.rb @@ -50,6 +50,7 @@ module Environment Setting::Boolean.new("config.reload.automatic", false), Setting::TimeValue.new("config.reload.interval", "3s"), # in seconds Setting::Boolean.new("config.support_escapes", false), + Setting::String.new("config.field_reference.escape_style", "none", true, %w(none percent ampersand)), Setting::Boolean.new("metric.collect", true), Setting::String.new("pipeline.id", "main"), Setting::Boolean.new("pipeline.system", false), diff --git a/logstash-core/lib/logstash/runner.rb b/logstash-core/lib/logstash/runner.rb index cd6b4949232..14541c728a0 100644 --- a/logstash-core/lib/logstash/runner.rb +++ b/logstash-core/lib/logstash/runner.rb @@ -86,6 +86,11 @@ class LogStash::Runner < Clamp::StrictCommand :default => LogStash::SETTINGS.get_default("config.string"), :attribute_name => "config.string" + option ["--field-reference-escape-style"], "STYLE", + I18n.t("logstash.runner.flag.field-reference-escape-style"), + :default => LogStash::SETTINGS.get_default("config.field_reference.escape_style"), + :attribute_name => "config.field_reference.escape_style" + # Module settings option ["--modules"], "MODULES", I18n.t("logstash.runner.flag.modules"), @@ -324,6 +329,14 @@ def execute validate_settings! or return 1 @dispatcher.fire(:before_bootstrap_checks) + field_reference_escape_style_setting = settings.get_setting('config.field_reference.escape_style') + if field_reference_escape_style_setting.set? + logger.warn(I18n.t("logstash.settings.technical_preview.set", canonical_name: field_reference_escape_style_setting.name)) + end + field_reference_escape_style = field_reference_escape_style_setting.value + logger.debug("Setting global FieldReference escape style: #{field_reference_escape_style}") + org.logstash.FieldReference::set_escape_style(field_reference_escape_style) + return start_shell(setting("interactive"), binding) if setting("interactive") module_parser = LogStash::Modules::CLIParser.new(setting("modules_list"), setting("modules_variable_list")) diff --git a/logstash-core/locales/en.yml b/logstash-core/locales/en.yml index fb728f87570..2f8170ed9c6 100644 --- a/logstash-core/locales/en.yml +++ b/logstash-core/locales/en.yml @@ -219,6 +219,31 @@ en: "%{default_output}" If you wish to use both defaults, please use the empty string for the '-e' flag. + field-reference-escape-style: |+ + Use the given STYLE when parsing field + references. This allows you to reference fields + whose name includes characters that are + meaningful in a field reference including square + brackets (`[` and `]`). + + This feature is in TECHNICAL PREVIEW, and + implementations are subject to change. + + Available escape styles are: + - `none`: escape sequences in field references + are not processed, which means fields that + contain special characters cannot be + referenced. + - `percent`: characters may be encoded with + URI-style percent notation represeting UTF-8 + bytes (`[` is `%5B`; `]` is `%5D`). + Unlike URI-encoding, literal percent characters + do not need to be escaped unless followed by a + sequence of 2 capital hexadecimal characters. + - `ampersand`: characters may be encoded with + HTML-style ampersand-hash encoding notation + representing decimal unicode codepoints + (`[` is `[`; `]` is `]`). modules: |+ Load Logstash modules. Modules can be defined using multiple instances @@ -431,4 +456,8 @@ en: ambiguous: >- Both `%{canonical_name}` and its deprecated alias `%{deprecated_alias}` have been set. Please only set `%{canonical_name}` + technical_preview: + set: >- + The setting `%{canonical_name}` is in TECHNICAL PREVIEW and its implementation + is subject to change in a future release of Logstash diff --git a/logstash-core/spec/logstash/event_spec.rb b/logstash-core/spec/logstash/event_spec.rb index b130a1c22a6..53f32837aa3 100644 --- a/logstash-core/spec/logstash/event_spec.rb +++ b/logstash-core/spec/logstash/event_spec.rb @@ -192,6 +192,15 @@ expect { e.set('[', 'value') }.to raise_exception(::RuntimeError) end end + + context 'with map value whose keys have FieldReference-special characters' do + let(:event) { LogStash::Event.new } + let(:value) { {"this[field]" => "okay"} } + it 'sets the value correctly' do + event.set('[some][field]', value.dup) + expect(event.get('[some][field]')).to eq(value) + end + end end context "timestamp" do @@ -351,6 +360,11 @@ expect(e.timestamp.to_iso8601).to eq("2016-05-28T23:02:05.350Z") end + it 'accepts maps whose keys contain FieldReference-special characters' do + e = LogStash::Event.new({"nested" => {"i][egal" => "okay"}, "n[0]" => "bene"}) + expect(e.get('nested')).to eq({"i][egal" => "okay"}) + expect(e.to_hash).to include("n[0]" => "bene") + end end context "method missing exception messages" do diff --git a/logstash-core/src/main/java/org/logstash/ConvertedMap.java b/logstash-core/src/main/java/org/logstash/ConvertedMap.java index a89855dfcd7..c9f0da606e9 100644 --- a/logstash-core/src/main/java/org/logstash/ConvertedMap.java +++ b/logstash-core/src/main/java/org/logstash/ConvertedMap.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.jruby.RubyHash; import org.jruby.RubyString; import org.jruby.runtime.ThreadContext; @@ -35,15 +36,51 @@ * as key.

*

The {@code put} method will work with any {@link String} key but is only intended for use in * situations where {@link ConvertedMap#putInterned(String, Object)} would require manually - * interning the {@link String} key. This is due to the fact that we use our internal - * {@link FieldReference} cache to get an interned version of the given key instead of JDKs - * {@link String#intern()}, which is faster since it works from a much smaller and hotter cache - * in {@link FieldReference#CACHE} than using String interning directly.

+ * interning the {@link String} key. + * This is due to the fact that it is based on {@link IdentityHashMap}, and we rely on the String + * intern pool to ensure identity match of equivalent strings. + * For performance, we keep a global cache of strings that have been interned for use with {@link ConvertedMap}, + * and encourage interning through {@link ConvertedMap#internStringForUseAsKey(String)} to avoid + * the performance pentalty of the global string intern pool. */ public final class ConvertedMap extends IdentityHashMap { private static final long serialVersionUID = 1L; + private static final ConcurrentHashMap KEY_CACHE = new ConcurrentHashMap<>(100, 0.2F, 16); + + /** + * Returns an equivalent interned string, possibly avoiding the + * global intern pool. + * + * @param candidate the candidate {@link String} + * @return an interned string from the global String intern pool + */ + static String internStringForUseAsKey(final String candidate) { + // TODO: replace with LRU cache and/or isolated intern pool + final String cached = KEY_CACHE.get(candidate); + if (cached != null) { return cached; } + + final String interned = candidate.intern(); + if (KEY_CACHE.size() <= 10_000 ) { + KEY_CACHE.put(interned, interned); + } + return interned; + } + + /** + * Ensures that the provided {@code String[]} contains only + * instances that have been {@link ConvertedMap::internStringForUseAsKey}, + * possibly replacing entries with equivalent interned strings. + * + * @param candidates an array of non-null strings + */ + static void internStringsForUseAsKeys(final String[] candidates) { + for (int i = 0; i < candidates.length; i++) { + candidates[i] = internStringForUseAsKey(candidates[i]); + } + } + private static final RubyHash.VisitorWithState RUBY_HASH_VISITOR = new RubyHash.VisitorWithState() { @Override @@ -91,7 +128,7 @@ public static ConvertedMap newFromRubyHash(final ThreadContext context, final Ru @Override public Object put(final String key, final Object value) { - return super.put(FieldReference.from(key).getKey(), value); + return super.put(internStringForUseAsKey(key), value); } /** @@ -118,6 +155,6 @@ public Object unconvert() { * @return Interned String */ private static String convertKey(final RubyString key) { - return FieldReference.from(key).getKey(); + return internStringForUseAsKey(key.asJavaString()); } } diff --git a/logstash-core/src/main/java/org/logstash/FieldReference.java b/logstash-core/src/main/java/org/logstash/FieldReference.java index c0b86d4fb2a..ff121413a79 100644 --- a/logstash-core/src/main/java/org/logstash/FieldReference.java +++ b/logstash-core/src/main/java/org/logstash/FieldReference.java @@ -27,7 +27,10 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + import org.jruby.RubyString; +import org.logstash.util.EscapeHandler; /** * Represents a reference to another field of the event {@link Event} @@ -44,19 +47,41 @@ public static class IllegalSyntaxException extends RuntimeException { } } + private static EscapeHandler ESCAPE_HANDLER = EscapeHandler.NONE; + + public static void setEscapeStyle(final String escapeStyleSpec) { + final EscapeHandler newEscapeHandler; + switch(escapeStyleSpec) { + case "none": + newEscapeHandler = EscapeHandler.NONE; + break; + case "percent": + newEscapeHandler = EscapeHandler.PERCENT; + break; + case "ampersand": + newEscapeHandler = EscapeHandler.AMPERSAND; + break; + default: + throw new IllegalArgumentException(String.format("Invalid escape style: `%s`", escapeStyleSpec)); + } + ESCAPE_HANDLER = newEscapeHandler; + CACHE.clear(); + RUBY_CACHE.clear(); + } + /** * This type indicates that the referenced that is the metadata of an {@link Event} found in - * {@link Event#metadata}. + * {@link Event#getMetadata()}. */ public static final int META_PARENT = 0; /** - * This type indicates that the referenced data must be looked up from {@link Event#metadata}. + * This type indicates that the referenced data must be looked up from {@link Event#getMetadata()}. */ public static final int META_CHILD = 1; /** - * This type indicates that the referenced data must be looked up from {@link Event#data}. + * This type indicates that the referenced data must be looked up from {@link Event#getData()}. */ private static final int DATA_CHILD = -1; @@ -72,15 +97,6 @@ public static class IllegalSyntaxException extends RuntimeException { */ private static final StrictTokenizer TOKENIZER = new StrictTokenizer(); - /** - * Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}. - */ - public static final FieldReference TIMESTAMP_REFERENCE = - deduplicate(new FieldReference(EMPTY_STRING_ARRAY, Event.TIMESTAMP, DATA_CHILD)); - - private static final FieldReference METADATA_PARENT_REFERENCE = - new FieldReference(EMPTY_STRING_ARRAY, Event.METADATA, META_PARENT); - /** * Cache of all existing {@link FieldReference} by their {@link RubyString} source. */ @@ -93,6 +109,11 @@ public static class IllegalSyntaxException extends RuntimeException { private static final Map CACHE = new ConcurrentHashMap<>(64, 0.2F, 1); + /** + * Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}. + */ + public static final FieldReference TIMESTAMP_REFERENCE = FieldReference.from(Event.TIMESTAMP); + private final String[] path; private final String key; @@ -106,9 +127,10 @@ public static class IllegalSyntaxException extends RuntimeException { private final int type; private FieldReference(final String[] path, final String key, final int type) { - this.key = key; - this.type = type; + this.key = ConvertedMap.internStringForUseAsKey(key); + ConvertedMap.internStringsForUseAsKeys(path); this.path = path; + this.type = type; hash = calculateHash(this.key, this.path, this.type); } @@ -219,12 +241,14 @@ private static FieldReference parseToCache(final String reference) { } private static FieldReference parse(final CharSequence reference) { - final List path = TOKENIZER.tokenize(reference); + final List path = TOKENIZER.tokenize(reference).stream() + .map(ESCAPE_HANDLER::unescape) + .collect(Collectors.toList()); - final String key = path.remove(path.size() - 1).intern(); + final String key = path.remove(path.size() - 1); final boolean empty = path.isEmpty(); if (empty && key.equals(Event.METADATA)) { - return METADATA_PARENT_REFERENCE; + return new FieldReference(EMPTY_STRING_ARRAY, key, META_PARENT); } else if (!empty && path.get(0).equals(Event.METADATA)) { return new FieldReference( path.subList(1, path.size()).toArray(EMPTY_STRING_ARRAY), key, META_CHILD @@ -240,7 +264,11 @@ private static FieldReference parse(final CharSequence reference) { **/ private static class StrictTokenizer { - public List tokenize(CharSequence reference) { + /** + * @param reference a sequence of characters representing a reference to a field + * @return a list of string path fragments. + */ + public List tokenize(final CharSequence reference) { ArrayList path = new ArrayList<>(); final int length = reference.length(); @@ -286,7 +314,7 @@ public List tokenize(CharSequence reference) { if (splitPoint < i) { // if we have something to add, add it. - path.add(reference.subSequence(splitPoint, i).toString().intern()); + path.add(reference.subSequence(splitPoint, i).toString()); } depth--; @@ -310,6 +338,7 @@ public List tokenize(CharSequence reference) { // further processing path.add(reference.toString()); return path; + } else if (depth > 0) { // when we hit the end-of-input while still in an open bracket, we have an invalid field reference potentiallyAmbiguousSyntaxDetected = true; diff --git a/logstash-core/src/main/java/org/logstash/util/EscapeHandler.java b/logstash-core/src/main/java/org/logstash/util/EscapeHandler.java new file mode 100644 index 00000000000..21d185a6815 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/util/EscapeHandler.java @@ -0,0 +1,74 @@ +package org.logstash.util; + +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; + +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import java.util.regex.Pattern; + +public interface EscapeHandler { + String unescape(String escaped); + String escape(String unescaped); + + EscapeHandler NONE = new EscapeHandler() { + @Override + public String unescape(final String escaped) { + return escaped; + } + + @Override + public String escape(final String unescaped) { + return unescaped; + } + }; + + EscapeHandler PERCENT = new EscapeHandler() { + private final Pattern ESCAPE_REQUIRED_PERCENT_LITERAL = Pattern.compile("%(?=[0-9A-F]{2})"); + + @Override + public String escape(final String unescaped) { + // When a percent-literal is followed by a pair of hex digits, we must escape it. + return ESCAPE_REQUIRED_PERCENT_LITERAL.matcher(unescaped).replaceAll("%25") + .replace("[", "%5B") + .replace("]", "%5D"); + } + + private final Pattern PERCENT_ENCODED_SEQUENCE = Pattern.compile("%[0-9A-F]{2}"); + private final Pattern UNESCAPED_PERCENT_LITERAL = Pattern.compile("%(?![0-9A-F]{2})"); + + public String unescape(String escaped) { + if (!escaped.contains("%")) { return escaped; } + if (!PERCENT_ENCODED_SEQUENCE.matcher(escaped).find()) { return escaped; } + + // In order to support unescaped percent-literals without implementing + // our own percent-decoder, we need to detect them and escape them before + // handing off to java's URLDecoder. + escaped = UNESCAPED_PERCENT_LITERAL.matcher(escaped).replaceAll("%25"); + + return URLDecoder.decode(escaped, StandardCharsets.UTF_8); + } + }; + + EscapeHandler AMPERSAND = new EscapeHandler() { + private final Pattern AMPERSAND_ENCODED_SEQUENCE = Pattern.compile("&#([0-9]{2,});"); + + @Override + public String escape(final String unescaped) { + return unescaped.replaceAll(AMPERSAND_ENCODED_SEQUENCE.pattern(), "&#$1;") + .replace("[", "[") + .replace("]", "]"); + } + + @Override + public String unescape(final String escaped) { + if (!escaped.contains("&")) { return escaped; } + + return AMPERSAND_ENCODED_SEQUENCE.matcher(escaped).replaceAll(matchResult -> { + final int codePoint = Integer.parseInt(matchResult.group(1)); + final char[] chars = Character.toChars(codePoint); + return String.copyValueOf(chars); + }); + } + }; +} diff --git a/logstash-core/src/test/java/org/logstash/EventTest.java b/logstash-core/src/test/java/org/logstash/EventTest.java index 203bb44971b..9d7348d07e1 100644 --- a/logstash-core/src/test/java/org/logstash/EventTest.java +++ b/logstash-core/src/test/java/org/logstash/EventTest.java @@ -156,6 +156,21 @@ public void testSimpleStringFieldToJson() throws Exception { assertJsonEquals("{\"@timestamp\":\"" + e.getTimestamp().toString() + "\",\"foo\":\"bar\",\"@version\":\"1\"}", e.toJson()); } + @Test + public void testFieldThatIsNotAValidNestedFieldReference() throws Exception { + Map data = new HashMap<>(); + data.put("this[key]", "value"); + final Event e = new Event(data); + assertJsonEquals("{\"@timestamp\":\"" + e.getTimestamp().toString() + "\",\"this[key]\":\"value\",\"@version\":\"1\"}", e.toJson()); + } + @Test + public void testFieldThatLooksLikeAValidNestedFieldReference() throws Exception { + Map data = new HashMap<>(); + data.put("[deeply][nested][field]", "value"); + final Event e = new Event(data); + assertJsonEquals("{\"@timestamp\":\"" + e.getTimestamp().toString() + "\",\"[deeply][nested][field]\":\"value\",\"@version\":\"1\"}", e.toJson()); + } + @Test public void testSimpleIntegerFieldToJson() throws Exception { Map data = new HashMap<>(); diff --git a/logstash-core/src/test/java/org/logstash/FieldReferenceTest.java b/logstash-core/src/test/java/org/logstash/FieldReferenceTest.java index fcc10cbe4ee..eda9d9627e7 100644 --- a/logstash-core/src/test/java/org/logstash/FieldReferenceTest.java +++ b/logstash-core/src/test/java/org/logstash/FieldReferenceTest.java @@ -27,6 +27,8 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; @@ -34,161 +36,260 @@ import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; -public final class FieldReferenceTest extends RubyTestBase { - @Before - @After - public void clearInternalCaches() { - getInternalCache("CACHE").clear(); - getInternalCache("DEDUP").clear(); - getInternalCache("RUBY_CACHE").clear(); - } +@RunWith(Suite.class) +@Suite.SuiteClasses({ + FieldReferenceTest.EscapeNone.class, + FieldReferenceTest.EscapePercent.class, + FieldReferenceTest.EscapeAmpersand.class, +}) +public final class FieldReferenceTest { + private static abstract class Base extends RubyTestBase { + public abstract String getEscapeMode(); - @Test - public void deduplicatesTimestamp() throws Exception { - assertTrue(FieldReference.from("@timestamp") == FieldReference.from("[@timestamp]")); - } + @Before + public void overrideGlobalEscapeMode() { + FieldReference.setEscapeStyle(getEscapeMode()); + } + @After + public void restoreGlobalEscapeMode() { + // Default value for `config.field_reference.escape_style` + FieldReference.setEscapeStyle("none"); + } - @Test - public void testParseEmptyString() { - final FieldReference emptyReference = FieldReference.from(""); - assertNotNull(emptyReference); - assertEquals( - emptyReference, FieldReference.from(RubyUtil.RUBY.newString("")) - ); - } + @Before + @After + public void clearInternalCaches() { + getInternalCache("CACHE").clear(); + getInternalCache("DEDUP").clear(); + getInternalCache("RUBY_CACHE").clear(); + } - @Test - public void testCacheUpperBound() { - final Map cache = getInternalCache("CACHE"); - final int initial = cache.size(); - for (int i = 0; i < 10_001 - initial; ++i) { - FieldReference.from(String.format("[array][%d]", i)); + @Test + public void deduplicatesTimestamp() throws Exception { + assertTrue(FieldReference.from("@timestamp") == FieldReference.from("[@timestamp]")); } - assertThat(cache.size(), CoreMatchers.is(10_000)); - } - @Test - public void testRubyCacheUpperBound() { - final Map cache = getInternalCache("RUBY_CACHE"); - final int initial = cache.size(); - for (int i = 0; i < 10_050 - initial; ++i) { - final RubyString rubyString = RubyUtil.RUBY.newString(String.format("[array][%d]", i)); - FieldReference.from(rubyString); + @Test + public void testParseEmptyString() { + final FieldReference emptyReference = FieldReference.from(""); + assertNotNull(emptyReference); + assertEquals( + emptyReference, FieldReference.from(RubyUtil.RUBY.newString("")) + ); } - assertThat(cache.size(), CoreMatchers.is(10_000)); - } - @Test - public void testParseSingleBareField() throws Exception { - FieldReference f = FieldReference.from("foo"); - assertEquals(0, f.getPath().length); - assertEquals("foo", f.getKey()); - } + @Test + public void testCacheUpperBound() { + final Map cache = getInternalCache("CACHE"); + final int initial = cache.size(); + for (int i = 0; i < 10_001 - initial; ++i) { + FieldReference.from(String.format("[array][%d]", i)); + } + assertThat(cache.size(), CoreMatchers.is(10_000)); + } - @Test - public void testParseSingleFieldPath() throws Exception { - FieldReference f = FieldReference.from("[foo]"); - assertEquals(0, f.getPath().length); - assertEquals("foo", f.getKey()); - } + @Test + public void testRubyCacheUpperBound() { + final Map cache = getInternalCache("RUBY_CACHE"); + final int initial = cache.size(); + for (int i = 0; i < 10_050 - initial; ++i) { + final RubyString rubyString = RubyUtil.RUBY.newString(String.format("[array][%d]", i)); + FieldReference.from(rubyString); + } + assertThat(cache.size(), CoreMatchers.is(10_000)); + } - @Test - public void testParse2FieldsPath() throws Exception { - FieldReference f = FieldReference.from("[foo][bar]"); - assertArrayEquals(new String[]{"foo"}, f.getPath()); - assertEquals("bar", f.getKey()); - } + @Test + public void testParseSingleBareField() throws Exception { + FieldReference f = FieldReference.from("foo"); + assertEquals(0, f.getPath().length); + assertEquals("foo", f.getKey()); + } - @Test - public void testParse3FieldsPath() throws Exception { - FieldReference f = FieldReference.from("[foo][bar][baz]"); - assertArrayEquals(new String[]{"foo", "bar"}, f.getPath()); - assertEquals("baz", f.getKey()); - } + @Test + public void testParseSingleFieldPath() throws Exception { + FieldReference f = FieldReference.from("[foo]"); + assertEquals(0, f.getPath().length); + assertEquals("foo", f.getKey()); + } - @Test - public void testEmbeddedSingleReference() throws Exception { - FieldReference f = FieldReference.from("[[foo]][bar]"); - assertArrayEquals(new String[]{"foo"}, f.getPath()); - assertEquals("bar", f.getKey()); - } + @Test + public void testParse2FieldsPath() throws Exception { + FieldReference f = FieldReference.from("[foo][bar]"); + assertArrayEquals(new String[]{"foo"}, f.getPath()); + assertEquals("bar", f.getKey()); + } - @Test - public void testEmbeddedDeepReference() throws Exception { - FieldReference f = FieldReference.from("[[foo][bar]][baz]"); - assertArrayEquals(new String[]{"foo", "bar"}, f.getPath()); - assertEquals("baz", f.getKey()); - } + @Test + public void testParse3FieldsPath() throws Exception { + FieldReference f = FieldReference.from("[foo][bar][baz]"); + assertArrayEquals(new String[]{"foo", "bar"}, f.getPath()); + assertEquals("baz", f.getKey()); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidEmbeddedDeepReference() throws Exception { - FieldReference f = FieldReference.from("[[foo][bar]nope][baz]"); - } + @Test + public void testEmbeddedSingleReference() throws Exception { + FieldReference f = FieldReference.from("[[foo]][bar]"); + assertArrayEquals(new String[]{"foo"}, f.getPath()); + assertEquals("bar", f.getKey()); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidEmbeddedDeepReference2() throws Exception { - FieldReference f = FieldReference.from("[nope[foo][bar]][baz]"); - } + @Test + public void testEmbeddedDeepReference() throws Exception { + FieldReference f = FieldReference.from("[[foo][bar]][baz]"); + assertArrayEquals(new String[]{"foo", "bar"}, f.getPath()); + assertEquals("baz", f.getKey()); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidNoCloseBracket() throws Exception { - FieldReference.from("[foo][bar][baz"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidEmbeddedDeepReference() throws Exception { + FieldReference f = FieldReference.from("[[foo][bar]nope][baz]"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidNoInitialOpenBracket() throws Exception { - FieldReference.from("foo[bar][baz]"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidEmbeddedDeepReference2() throws Exception { + FieldReference f = FieldReference.from("[nope[foo][bar]][baz]"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidMissingMiddleBracket() throws Exception { - FieldReference.from("[foo]bar[baz]"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidNoCloseBracket() throws Exception { + FieldReference.from("[foo][bar][baz"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidOnlyOpenBracket() throws Exception { - FieldReference.from("["); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidNoInitialOpenBracket() throws Exception { + FieldReference.from("foo[bar][baz]"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidOnlyCloseBracket() throws Exception { - FieldReference.from("]"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidMissingMiddleBracket() throws Exception { + FieldReference.from("[foo]bar[baz]"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidLotsOfOpenBrackets() throws Exception { - FieldReference.from("[[[[[[[[[[[]"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidOnlyOpenBracket() throws Exception { + FieldReference.from("["); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseInvalidDoubleCloseBrackets() throws Exception { - FieldReference.from("[foo]][bar]"); - } + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidOnlyCloseBracket() throws Exception { + FieldReference.from("]"); + } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseNestingSquareBrackets() throws Exception { - FieldReference.from("[this[is]terrible]"); + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidLotsOfOpenBrackets() throws Exception { + FieldReference.from("[[[[[[[[[[[]"); + } + + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseInvalidDoubleCloseBrackets() throws Exception { + FieldReference.from("[foo]][bar]"); + } + + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseNestingSquareBrackets() throws Exception { + FieldReference.from("[this[is]terrible]"); + } + + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseChainedNestingSquareBrackets() throws Exception { + FieldReference.from("[this[is]terrible][and][it[should-not[work]]]"); + } + + @Test(expected = FieldReference.IllegalSyntaxException.class) + public void testParseLiteralSquareBrackets() throws Exception { + FieldReference.from("this[index]"); + } + + @SuppressWarnings("unchecked") + private Map getInternalCache(final String fieldName) { + final Field cacheField; + try { + cacheField = FieldReference.class.getDeclaredField(fieldName); + cacheField.setAccessible(true); + return (Map) cacheField.get(null); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseChainedNestingSquareBrackets() throws Exception { - FieldReference.from("[this[is]terrible][and][it[should-not[work]]]"); + public static class EscapeNone extends Base { + + @Override + public String getEscapeMode() { + return "none"; + } } - @Test(expected=FieldReference.IllegalSyntaxException.class) - public void testParseLiteralSquareBrackets() throws Exception { - FieldReference.from("this[index]"); + public static class EscapePercent extends Base { + + @Override + public String getEscapeMode() { + return "percent"; + } + + @Test + public void testReadFieldWithSquareBracketLiteralsInPath() { + final FieldReference fr = FieldReference.from("[foo][bar%5Bbingo%5D][okay]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("bar[bingo]", fr.getPath()[1]); + assertEquals("okay", fr.getKey()); + } + + @Test + public void testReadFieldWithSquareBracketLiteralsInKey() { + final FieldReference fr = FieldReference.from("[foo][okay][bar%5Bbingo%5D]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("okay", fr.getPath()[1]); + assertEquals("bar[bingo]", fr.getKey()); + } + + @Test + public void testReadFieldWithPercentLiteralInKey() { + final FieldReference fr = FieldReference.from("[foo][bar][95%]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("bar", fr.getPath()[1]); + assertEquals("95%", fr.getKey()); + } } - @SuppressWarnings("unchecked") - private Map getInternalCache(final String fieldName) { - final Field cacheField; - try { - cacheField = FieldReference.class.getDeclaredField(fieldName); - cacheField.setAccessible(true); - return (Map) cacheField.get(null); - } catch (NoSuchFieldException | IllegalAccessException e) { - throw new RuntimeException(e); + public static class EscapeAmpersand extends Base { + + @Override + public String getEscapeMode() { + return "ampersand"; + } + + + @Test + public void testReadFieldWithSquareBracketLiteralsInPath() { + final FieldReference fr = FieldReference.from("[foo][bar[bingo]][okay]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("bar[bingo]", fr.getPath()[1]); + assertEquals("okay", fr.getKey()); + } + + @Test + public void testReadFieldWithSquareBracketLiteralsInKey() { + final FieldReference fr = FieldReference.from("[foo][okay][bar[bingo]]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("okay", fr.getPath()[1]); + assertEquals("bar[bingo]", fr.getKey()); + } + + @Test + public void testReadFieldWithAmpersandLiteralInKey() { + final FieldReference fr = FieldReference.from("[foo][bar][this&that]"); + assertEquals(2, fr.getPath().length); + assertEquals("foo", fr.getPath()[0]); + assertEquals("bar", fr.getPath()[1]); + assertEquals("this&that", fr.getKey()); } } } diff --git a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java index 538d8695715..2bf627c9eb0 100644 --- a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java +++ b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java @@ -27,10 +27,12 @@ import java.util.Map; import org.assertj.core.api.Assertions; import org.hamcrest.CoreMatchers; +import org.jruby.RubyBoolean; import org.jruby.RubyHash; import org.jruby.RubyString; import org.jruby.exceptions.RuntimeError; import org.jruby.javasupport.JavaUtil; +import org.jruby.runtime.Block; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.junit.Assert; @@ -98,19 +100,21 @@ public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferences() { } @Test - public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferencesInMap() { + public void correctlySetsValueWhenGivenMapWithKeysThatHaveFieldReferenceSpecialCharacters() { final ThreadContext context = RubyUtil.RUBY.getCurrentContext(); final JrubyEventExtLibrary.RubyEvent event = JrubyEventExtLibrary.RubyEvent.newRubyEvent(context.runtime); final RubyString key = rubyString("foo"); final RubyHash value = RubyHash.newHash(context.runtime, Collections.singletonMap(rubyString("il[[]]]legal"), rubyString("okay")), context.nil); - try { - event.ruby_set_field(context, key, value); - } catch (RuntimeError rubyRuntimeError) { - Assert.assertThat(rubyRuntimeError.getLocalizedMessage(), CoreMatchers.containsString("Invalid FieldReference")); - return; - } - Assert.fail("expected ruby RuntimeError was not thrown."); + + event.ruby_set_field(context, key, value); + IRubyObject retrievedValue = event.ruby_get_field(context, key); + Assert.assertThat(retrievedValue, CoreMatchers.equalTo(value)); + + RubyHash eventHash = (RubyHash) event.ruby_to_hash_with_metadata(context); + IRubyObject nestedValue = eventHash.dig(context, rubyString("foo"), rubyString("il[[]]]legal")); + Assert.assertFalse(nestedValue.isNil()); + Assert.assertEquals(rubyString("okay"), nestedValue); } private static RubyString rubyString(final String java) {