diff --git a/logstash-core/spec/logstash/event_spec.rb b/logstash-core/spec/logstash/event_spec.rb
index b130a1c22a6..53f32837aa3 100644
--- a/logstash-core/spec/logstash/event_spec.rb
+++ b/logstash-core/spec/logstash/event_spec.rb
@@ -192,6 +192,15 @@
expect { e.set('[', 'value') }.to raise_exception(::RuntimeError)
end
end
+
+ context 'with map value whose keys have FieldReference-special characters' do
+ let(:event) { LogStash::Event.new }
+ let(:value) { {"this[field]" => "okay"} }
+ it 'sets the value correctly' do
+ event.set('[some][field]', value.dup)
+ expect(event.get('[some][field]')).to eq(value)
+ end
+ end
end
context "timestamp" do
@@ -351,6 +360,11 @@
expect(e.timestamp.to_iso8601).to eq("2016-05-28T23:02:05.350Z")
end
+ it 'accepts maps whose keys contain FieldReference-special characters' do
+ e = LogStash::Event.new({"nested" => {"i][egal" => "okay"}, "n[0]" => "bene"})
+ expect(e.get('nested')).to eq({"i][egal" => "okay"})
+ expect(e.to_hash).to include("n[0]" => "bene")
+ end
end
context "method missing exception messages" do
diff --git a/logstash-core/src/main/java/org/logstash/ConvertedMap.java b/logstash-core/src/main/java/org/logstash/ConvertedMap.java
index a89855dfcd7..c9f0da606e9 100644
--- a/logstash-core/src/main/java/org/logstash/ConvertedMap.java
+++ b/logstash-core/src/main/java/org/logstash/ConvertedMap.java
@@ -24,6 +24,7 @@
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
@@ -35,15 +36,51 @@
* as key.
* The {@code put} method will work with any {@link String} key but is only intended for use in
* situations where {@link ConvertedMap#putInterned(String, Object)} would require manually
- * interning the {@link String} key. This is due to the fact that we use our internal
- * {@link FieldReference} cache to get an interned version of the given key instead of JDKs
- * {@link String#intern()}, which is faster since it works from a much smaller and hotter cache
- * in {@link FieldReference#CACHE} than using String interning directly.
+ * interning the {@link String} key.
+ * This is due to the fact that it is based on {@link IdentityHashMap}, and we rely on the String
+ * intern pool to ensure identity match of equivalent strings.
+ * For performance, we keep a global cache of strings that have been interned for use with {@link ConvertedMap},
+ * and encourage interning through {@link ConvertedMap#internStringForUseAsKey(String)} to avoid
+ * the performance pentalty of the global string intern pool.
*/
public final class ConvertedMap extends IdentityHashMap {
private static final long serialVersionUID = 1L;
+ private static final ConcurrentHashMap KEY_CACHE = new ConcurrentHashMap<>(100, 0.2F, 16);
+
+ /**
+ * Returns an equivalent interned string, possibly avoiding the
+ * global intern pool.
+ *
+ * @param candidate the candidate {@link String}
+ * @return an interned string from the global String intern pool
+ */
+ static String internStringForUseAsKey(final String candidate) {
+ // TODO: replace with LRU cache and/or isolated intern pool
+ final String cached = KEY_CACHE.get(candidate);
+ if (cached != null) { return cached; }
+
+ final String interned = candidate.intern();
+ if (KEY_CACHE.size() <= 10_000 ) {
+ KEY_CACHE.put(interned, interned);
+ }
+ return interned;
+ }
+
+ /**
+ * Ensures that the provided {@code String[]} contains only
+ * instances that have been {@link ConvertedMap::internStringForUseAsKey},
+ * possibly replacing entries with equivalent interned strings.
+ *
+ * @param candidates an array of non-null strings
+ */
+ static void internStringsForUseAsKeys(final String[] candidates) {
+ for (int i = 0; i < candidates.length; i++) {
+ candidates[i] = internStringForUseAsKey(candidates[i]);
+ }
+ }
+
private static final RubyHash.VisitorWithState RUBY_HASH_VISITOR =
new RubyHash.VisitorWithState() {
@Override
@@ -91,7 +128,7 @@ public static ConvertedMap newFromRubyHash(final ThreadContext context, final Ru
@Override
public Object put(final String key, final Object value) {
- return super.put(FieldReference.from(key).getKey(), value);
+ return super.put(internStringForUseAsKey(key), value);
}
/**
@@ -118,6 +155,6 @@ public Object unconvert() {
* @return Interned String
*/
private static String convertKey(final RubyString key) {
- return FieldReference.from(key).getKey();
+ return internStringForUseAsKey(key.asJavaString());
}
}
diff --git a/logstash-core/src/main/java/org/logstash/FieldReference.java b/logstash-core/src/main/java/org/logstash/FieldReference.java
index c0b86d4fb2a..0822e2c26c3 100644
--- a/logstash-core/src/main/java/org/logstash/FieldReference.java
+++ b/logstash-core/src/main/java/org/logstash/FieldReference.java
@@ -27,6 +27,7 @@
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+
import org.jruby.RubyString;
/**
@@ -75,11 +76,7 @@ public static class IllegalSyntaxException extends RuntimeException {
/**
* Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}.
*/
- public static final FieldReference TIMESTAMP_REFERENCE =
- deduplicate(new FieldReference(EMPTY_STRING_ARRAY, Event.TIMESTAMP, DATA_CHILD));
-
- private static final FieldReference METADATA_PARENT_REFERENCE =
- new FieldReference(EMPTY_STRING_ARRAY, Event.METADATA, META_PARENT);
+ public static final FieldReference TIMESTAMP_REFERENCE = FieldReference.from(Event.TIMESTAMP);
/**
* Cache of all existing {@link FieldReference} by their {@link RubyString} source.
@@ -106,9 +103,10 @@ public static class IllegalSyntaxException extends RuntimeException {
private final int type;
private FieldReference(final String[] path, final String key, final int type) {
- this.key = key;
- this.type = type;
+ this.key = ConvertedMap.internStringForUseAsKey(key);
+ ConvertedMap.internStringsForUseAsKeys(path);
this.path = path;
+ this.type = type;
hash = calculateHash(this.key, this.path, this.type);
}
@@ -221,10 +219,10 @@ private static FieldReference parseToCache(final String reference) {
private static FieldReference parse(final CharSequence reference) {
final List path = TOKENIZER.tokenize(reference);
- final String key = path.remove(path.size() - 1).intern();
+ final String key = path.remove(path.size() - 1);
final boolean empty = path.isEmpty();
if (empty && key.equals(Event.METADATA)) {
- return METADATA_PARENT_REFERENCE;
+ return new FieldReference(EMPTY_STRING_ARRAY, key, META_PARENT);
} else if (!empty && path.get(0).equals(Event.METADATA)) {
return new FieldReference(
path.subList(1, path.size()).toArray(EMPTY_STRING_ARRAY), key, META_CHILD
@@ -240,7 +238,11 @@ private static FieldReference parse(final CharSequence reference) {
**/
private static class StrictTokenizer {
- public List tokenize(CharSequence reference) {
+ /**
+ * @param reference a sequence of characters representing a reference to a field
+ * @return a list of string path fragments.
+ */
+ public List tokenize(final CharSequence reference) {
ArrayList path = new ArrayList<>();
final int length = reference.length();
@@ -286,7 +288,7 @@ public List tokenize(CharSequence reference) {
if (splitPoint < i) {
// if we have something to add, add it.
- path.add(reference.subSequence(splitPoint, i).toString().intern());
+ path.add(reference.subSequence(splitPoint, i).toString());
}
depth--;
@@ -309,7 +311,9 @@ public List tokenize(CharSequence reference) {
// if we saw no brackets, this is a top-level reference that can be emitted as-is without
// further processing
path.add(reference.toString());
+ path.trimToSize();
return path;
+
} else if (depth > 0) {
// when we hit the end-of-input while still in an open bracket, we have an invalid field reference
potentiallyAmbiguousSyntaxDetected = true;
diff --git a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java
index 538d8695715..2bf627c9eb0 100644
--- a/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java
+++ b/logstash-core/src/test/java/org/logstash/ext/JrubyEventExtLibraryTest.java
@@ -27,10 +27,12 @@
import java.util.Map;
import org.assertj.core.api.Assertions;
import org.hamcrest.CoreMatchers;
+import org.jruby.RubyBoolean;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.exceptions.RuntimeError;
import org.jruby.javasupport.JavaUtil;
+import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.junit.Assert;
@@ -98,19 +100,21 @@ public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferences() {
}
@Test
- public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferencesInMap() {
+ public void correctlySetsValueWhenGivenMapWithKeysThatHaveFieldReferenceSpecialCharacters() {
final ThreadContext context = RubyUtil.RUBY.getCurrentContext();
final JrubyEventExtLibrary.RubyEvent event =
JrubyEventExtLibrary.RubyEvent.newRubyEvent(context.runtime);
final RubyString key = rubyString("foo");
final RubyHash value = RubyHash.newHash(context.runtime, Collections.singletonMap(rubyString("il[[]]]legal"), rubyString("okay")), context.nil);
- try {
- event.ruby_set_field(context, key, value);
- } catch (RuntimeError rubyRuntimeError) {
- Assert.assertThat(rubyRuntimeError.getLocalizedMessage(), CoreMatchers.containsString("Invalid FieldReference"));
- return;
- }
- Assert.fail("expected ruby RuntimeError was not thrown.");
+
+ event.ruby_set_field(context, key, value);
+ IRubyObject retrievedValue = event.ruby_get_field(context, key);
+ Assert.assertThat(retrievedValue, CoreMatchers.equalTo(value));
+
+ RubyHash eventHash = (RubyHash) event.ruby_to_hash_with_metadata(context);
+ IRubyObject nestedValue = eventHash.dig(context, rubyString("foo"), rubyString("il[[]]]legal"));
+ Assert.assertFalse(nestedValue.isNil());
+ Assert.assertEquals(rubyString("okay"), nestedValue);
}
private static RubyString rubyString(final String java) {