diff --git a/logstash-core/spec/logstash/event_spec.rb b/logstash-core/spec/logstash/event_spec.rb index b130a1c22a6..53f32837aa3 100644 --- a/logstash-core/spec/logstash/event_spec.rb +++ b/logstash-core/spec/logstash/event_spec.rb @@ -192,6 +192,15 @@ expect { e.set('[', 'value') }.to raise_exception(::RuntimeError) end end + + context 'with map value whose keys have FieldReference-special characters' do + let(:event) { LogStash::Event.new } + let(:value) { {"this[field]" => "okay"} } + it 'sets the value correctly' do + event.set('[some][field]', value.dup) + expect(event.get('[some][field]')).to eq(value) + end + end end context "timestamp" do @@ -351,6 +360,11 @@ expect(e.timestamp.to_iso8601).to eq("2016-05-28T23:02:05.350Z") end + it 'accepts maps whose keys contain FieldReference-special characters' do + e = LogStash::Event.new({"nested" => {"i][egal" => "okay"}, "n[0]" => "bene"}) + expect(e.get('nested')).to eq({"i][egal" => "okay"}) + expect(e.to_hash).to include("n[0]" => "bene") + end end context "method missing exception messages" do diff --git a/logstash-core/src/main/java/org/logstash/ConvertedMap.java b/logstash-core/src/main/java/org/logstash/ConvertedMap.java index a89855dfcd7..c9f0da606e9 100644 --- a/logstash-core/src/main/java/org/logstash/ConvertedMap.java +++ b/logstash-core/src/main/java/org/logstash/ConvertedMap.java @@ -24,6 +24,7 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.jruby.RubyHash; import org.jruby.RubyString; import org.jruby.runtime.ThreadContext; @@ -35,15 +36,51 @@ * as key.

*

The {@code put} method will work with any {@link String} key but is only intended for use in * situations where {@link ConvertedMap#putInterned(String, Object)} would require manually - * interning the {@link String} key. This is due to the fact that we use our internal - * {@link FieldReference} cache to get an interned version of the given key instead of JDKs - * {@link String#intern()}, which is faster since it works from a much smaller and hotter cache - * in {@link FieldReference#CACHE} than using String interning directly.

+ * interning the {@link String} key. + * This is due to the fact that it is based on {@link IdentityHashMap}, and we rely on the String + * intern pool to ensure identity match of equivalent strings. + * For performance, we keep a global cache of strings that have been interned for use with {@link ConvertedMap}, + * and encourage interning through {@link ConvertedMap#internStringForUseAsKey(String)} to avoid + * the performance pentalty of the global string intern pool. */ public final class ConvertedMap extends IdentityHashMap { private static final long serialVersionUID = 1L; + private static final ConcurrentHashMap KEY_CACHE = new ConcurrentHashMap<>(100, 0.2F, 16); + + /** + * Returns an equivalent interned string, possibly avoiding the + * global intern pool. + * + * @param candidate the candidate {@link String} + * @return an interned string from the global String intern pool + */ + static String internStringForUseAsKey(final String candidate) { + // TODO: replace with LRU cache and/or isolated intern pool + final String cached = KEY_CACHE.get(candidate); + if (cached != null) { return cached; } + + final String interned = candidate.intern(); + if (KEY_CACHE.size() <= 10_000 ) { + KEY_CACHE.put(interned, interned); + } + return interned; + } + + /** + * Ensures that the provided {@code String[]} contains only + * instances that have been {@link ConvertedMap::internStringForUseAsKey}, + * possibly replacing entries with equivalent interned strings. + * + * @param candidates an array of non-null strings + */ + static void internStringsForUseAsKeys(final String[] candidates) { + for (int i = 0; i < candidates.length; i++) { + candidates[i] = internStringForUseAsKey(candidates[i]); + } + } + private static final RubyHash.VisitorWithState RUBY_HASH_VISITOR = new RubyHash.VisitorWithState() { @Override @@ -91,7 +128,7 @@ public static ConvertedMap newFromRubyHash(final ThreadContext context, final Ru @Override public Object put(final String key, final Object value) { - return super.put(FieldReference.from(key).getKey(), value); + return super.put(internStringForUseAsKey(key), value); } /** @@ -118,6 +155,6 @@ public Object unconvert() { * @return Interned String */ private static String convertKey(final RubyString key) { - return FieldReference.from(key).getKey(); + return internStringForUseAsKey(key.asJavaString()); } } diff --git a/logstash-core/src/main/java/org/logstash/FieldReference.java b/logstash-core/src/main/java/org/logstash/FieldReference.java index c0b86d4fb2a..0822e2c26c3 100644 --- a/logstash-core/src/main/java/org/logstash/FieldReference.java +++ b/logstash-core/src/main/java/org/logstash/FieldReference.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; + import org.jruby.RubyString; /** @@ -75,11 +76,7 @@ public static class IllegalSyntaxException extends RuntimeException { /** * Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}. */ - public static final FieldReference TIMESTAMP_REFERENCE = - deduplicate(new FieldReference(EMPTY_STRING_ARRAY, Event.TIMESTAMP, DATA_CHILD)); - - private static final FieldReference METADATA_PARENT_REFERENCE = - new FieldReference(EMPTY_STRING_ARRAY, Event.METADATA, META_PARENT); + public static final FieldReference TIMESTAMP_REFERENCE = FieldReference.from(Event.TIMESTAMP); /** * Cache of all existing {@link FieldReference} by their {@link RubyString} source. @@ -106,9 +103,10 @@ public static class IllegalSyntaxException extends RuntimeException { private final int type; private FieldReference(final String[] path, final String key, final int type) { - this.key = key; - this.type = type; + this.key = ConvertedMap.internStringForUseAsKey(key); + ConvertedMap.internStringsForUseAsKeys(path); this.path = path; + this.type = type; hash = calculateHash(this.key, this.path, this.type); } @@ -221,10 +219,10 @@ private static FieldReference parseToCache(final String reference) { private static FieldReference parse(final CharSequence reference) { final List path = TOKENIZER.tokenize(reference); - final String key = path.remove(path.size() - 1).intern(); + final String key = path.remove(path.size() - 1); final boolean empty = path.isEmpty(); if (empty && key.equals(Event.METADATA)) { - return METADATA_PARENT_REFERENCE; + return new FieldReference(EMPTY_STRING_ARRAY, key, META_PARENT); } else if (!empty && path.get(0).equals(Event.METADATA)) { return new FieldReference( path.subList(1, path.size()).toArray(EMPTY_STRING_ARRAY), key, META_CHILD @@ -240,7 +238,11 @@ private static FieldReference parse(final CharSequence reference) { **/ private static class StrictTokenizer { - public List tokenize(CharSequence reference) { + /** + * @param reference a sequence of characters representing a reference to a field + * @return a list of string path fragments. + */ + public List tokenize(final CharSequence reference) { ArrayList path = new ArrayList<>(); final int length = reference.length(); @@ -286,7 +288,7 @@ public List tokenize(CharSequence reference) { if (splitPoint < i) { // if we have something to add, add it. - path.add(reference.subSequence(splitPoint, i).toString().intern()); + path.add(reference.subSequence(splitPoint, i).toString()); } depth--; @@ -309,7 +311,9 @@ public List tokenize(CharSequence reference) { // if we saw no brackets, this is a top-level reference that can be emitted as-is without // further processing path.add(reference.toString()); + path.trimToSize(); return path; + } else if (depth > 0) { // when we hit the end-of-input while still in an open bracket, we have an invalid field reference potentiallyAmbiguousSyntaxDetected = true; diff --git a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java index 538d8695715..2bf627c9eb0 100644 --- a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java +++ b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java @@ -27,10 +27,12 @@ import java.util.Map; import org.assertj.core.api.Assertions; import org.hamcrest.CoreMatchers; +import org.jruby.RubyBoolean; import org.jruby.RubyHash; import org.jruby.RubyString; import org.jruby.exceptions.RuntimeError; import org.jruby.javasupport.JavaUtil; +import org.jruby.runtime.Block; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.junit.Assert; @@ -98,19 +100,21 @@ public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferences() { } @Test - public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferencesInMap() { + public void correctlySetsValueWhenGivenMapWithKeysThatHaveFieldReferenceSpecialCharacters() { final ThreadContext context = RubyUtil.RUBY.getCurrentContext(); final JrubyEventExtLibrary.RubyEvent event = JrubyEventExtLibrary.RubyEvent.newRubyEvent(context.runtime); final RubyString key = rubyString("foo"); final RubyHash value = RubyHash.newHash(context.runtime, Collections.singletonMap(rubyString("il[[]]]legal"), rubyString("okay")), context.nil); - try { - event.ruby_set_field(context, key, value); - } catch (RuntimeError rubyRuntimeError) { - Assert.assertThat(rubyRuntimeError.getLocalizedMessage(), CoreMatchers.containsString("Invalid FieldReference")); - return; - } - Assert.fail("expected ruby RuntimeError was not thrown."); + + event.ruby_set_field(context, key, value); + IRubyObject retrievedValue = event.ruby_get_field(context, key); + Assert.assertThat(retrievedValue, CoreMatchers.equalTo(value)); + + RubyHash eventHash = (RubyHash) event.ruby_to_hash_with_metadata(context); + IRubyObject nestedValue = eventHash.dig(context, rubyString("foo"), rubyString("il[[]]]legal")); + Assert.assertFalse(nestedValue.isNil()); + Assert.assertEquals(rubyString("okay"), nestedValue); } private static RubyString rubyString(final String java) {