Skip to content

Commit

Permalink
fix: correctly handle FieldReference-special characters in field names.
Browse files Browse the repository at this point in the history
Keys passed to most methods of `ConvertedMap`, based on `IdentityHashMap`
depend on identity and not equivalence, and therefore rely on the keys being
_interned_ strings. In order to avoid hitting the JVM's global String intern
pool (which can have performance problems), operations to normalize a string
to its interned counterpart have traditionally relied on the behaviour of
`FieldReference#from` returning a likely-cached `FieldReference`, that had
an interned `key` and an empty `path`.

This is problematic on two points.

First, when `ConvertedMap` was given data with keys that _were_ valid string
field references representing a nested field (such as `[host][geo][location]`),
the implementation of `ConvertedMap#put` effectively silently discarded the
path components because it assumed them to be empty, and only the key was
kept (`location`).

Second, when `ConvertedMap` was given a map whose keys contained what the
field reference parser considered special characters but _were NOT_
valid field references, the resulting `FieldReference.IllegalSyntaxException`
caused the operation to abort.

Instead of using the `FieldReference` cache, which sits on top of objects whose
`key` and `path`-components are known to have been interned, we introduce an
internment helper on our `ConvertedMap` that is also backed by the global string
intern pool, and ensure that our field references are primed through this pool.

In addition to fixing the `ConvertedMap#newFromMap` functionality, this has
three net effects:

 - Our ConvertedMap operations still use strings
   from the global intern pool
 - We have a new, smaller cache of individual field
   names, improving lookup performance
 - Our FieldReference cache no longer is flooded
   with fragments and therefore is more likely to
   remain performant

NOTE: this does NOT create isolated intern pools, as doing so would require
      a careful audit of the possible code-paths to `ConvertedMap#putInterned`.
      The new cache is limited to 10k strings, and when more are used only
      the FIRST 10k strings will be primed into the cache, leaving the
      remainder to always hit the global String intern pool.

NOTE: by fixing this bug, we alow events to be created whose fields _CANNOT_
      be referenced with the existing FieldReference implementation.

Resolves: elastic#13606
Resolves: elastic#11608
  • Loading branch information
yaauie committed Apr 25, 2022
1 parent 5d80ce0 commit 9f8be9e
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 25 deletions.
14 changes: 14 additions & 0 deletions logstash-core/spec/logstash/event_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,15 @@
expect { e.set('[', 'value') }.to raise_exception(::RuntimeError)
end
end

context 'with map value whose keys have FieldReference-special characters' do
let(:event) { LogStash::Event.new }
let(:value) { {"this[field]" => "okay"} }
it 'sets the value correctly' do
event.set('[some][field]', value.dup)
expect(event.get('[some][field]')).to eq(value)
end
end
end

context "timestamp" do
Expand Down Expand Up @@ -351,6 +360,11 @@
expect(e.timestamp.to_iso8601).to eq("2016-05-28T23:02:05.350Z")
end

it 'accepts maps whose keys contain FieldReference-special characters' do
e = LogStash::Event.new({"nested" => {"i][egal" => "okay"}, "n[0]" => "bene"})
expect(e.get('nested')).to eq({"i][egal" => "okay"})
expect(e.to_hash).to include("n[0]" => "bene")
end
end

context "method missing exception messages" do
Expand Down
49 changes: 43 additions & 6 deletions logstash-core/src/main/java/org/logstash/ConvertedMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
Expand All @@ -35,15 +36,51 @@
* as key.</p>
* <p>The {@code put} method will work with any {@link String} key but is only intended for use in
* situations where {@link ConvertedMap#putInterned(String, Object)} would require manually
* interning the {@link String} key. This is due to the fact that we use our internal
* {@link FieldReference} cache to get an interned version of the given key instead of JDKs
* {@link String#intern()}, which is faster since it works from a much smaller and hotter cache
* in {@link FieldReference#CACHE} than using String interning directly.</p>
* interning the {@link String} key.
* This is due to the fact that it is based on {@link IdentityHashMap}, and we rely on the String
* intern pool to ensure identity match of equivalent strings.
* For performance, we keep a global cache of strings that have been interned for use with {@link ConvertedMap},
* and encourage interning through {@link ConvertedMap#internStringForUseAsKey(String)} to avoid
* the performance pentalty of the global string intern pool.
*/
public final class ConvertedMap extends IdentityHashMap<String, Object> {

private static final long serialVersionUID = 1L;

private static final ConcurrentHashMap<String,String> KEY_CACHE = new ConcurrentHashMap<>(100, 0.2F, 16);

/**
* Returns an equivalent interned string, possibly avoiding the
* global intern pool.
*
* @param candidate the candidate {@link String}
* @return an interned string from the global String intern pool
*/
static String internStringForUseAsKey(final String candidate) {
// TODO: replace with LRU cache and/or isolated intern pool
final String cached = KEY_CACHE.get(candidate);
if (cached != null) { return cached; }

final String interned = candidate.intern();
if (KEY_CACHE.size() <= 10_000 ) {
KEY_CACHE.put(interned, interned);
}
return interned;
}

/**
* Ensures that the provided {@code String[]} contains only
* instances that have been {@link ConvertedMap::internStringForUseAsKey},
* possibly replacing entries with equivalent interned strings.
*
* @param candidates an array of non-null strings
*/
static void internStringsForUseAsKeys(final String[] candidates) {
for (int i = 0; i < candidates.length; i++) {
candidates[i] = internStringForUseAsKey(candidates[i]);
}
}

private static final RubyHash.VisitorWithState<ConvertedMap> RUBY_HASH_VISITOR =
new RubyHash.VisitorWithState<ConvertedMap>() {
@Override
Expand Down Expand Up @@ -91,7 +128,7 @@ public static ConvertedMap newFromRubyHash(final ThreadContext context, final Ru

@Override
public Object put(final String key, final Object value) {
return super.put(FieldReference.from(key).getKey(), value);
return super.put(internStringForUseAsKey(key), value);
}

/**
Expand All @@ -118,6 +155,6 @@ public Object unconvert() {
* @return Interned String
*/
private static String convertKey(final RubyString key) {
return FieldReference.from(key).getKey();
return internStringForUseAsKey(key.asJavaString());
}
}
26 changes: 15 additions & 11 deletions logstash-core/src/main/java/org/logstash/FieldReference.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;

import java.util.concurrent.ConcurrentHashMap;

import org.jruby.RubyString;

/**
Expand Down Expand Up @@ -75,11 +76,7 @@ public static class IllegalSyntaxException extends RuntimeException {
/**
* Unique {@link FieldReference} pointing at the timestamp field in a {@link Event}.
*/
public static final FieldReference TIMESTAMP_REFERENCE =
deduplicate(new FieldReference(EMPTY_STRING_ARRAY, Event.TIMESTAMP, DATA_CHILD));

private static final FieldReference METADATA_PARENT_REFERENCE =
new FieldReference(EMPTY_STRING_ARRAY, Event.METADATA, META_PARENT);
public static final FieldReference TIMESTAMP_REFERENCE = FieldReference.from(Event.TIMESTAMP);

/**
* Cache of all existing {@link FieldReference} by their {@link RubyString} source.
Expand All @@ -106,9 +103,10 @@ public static class IllegalSyntaxException extends RuntimeException {
private final int type;

private FieldReference(final String[] path, final String key, final int type) {
this.key = key;
this.type = type;
this.key = ConvertedMap.internStringForUseAsKey(key);
ConvertedMap.internStringsForUseAsKeys(path);
this.path = path;
this.type = type;
hash = calculateHash(this.key, this.path, this.type);
}

Expand Down Expand Up @@ -221,10 +219,10 @@ private static FieldReference parseToCache(final String reference) {
private static FieldReference parse(final CharSequence reference) {
final List<String> path = TOKENIZER.tokenize(reference);

final String key = path.remove(path.size() - 1).intern();
final String key = path.remove(path.size() - 1);
final boolean empty = path.isEmpty();
if (empty && key.equals(Event.METADATA)) {
return METADATA_PARENT_REFERENCE;
return new FieldReference(EMPTY_STRING_ARRAY, key, META_PARENT);
} else if (!empty && path.get(0).equals(Event.METADATA)) {
return new FieldReference(
path.subList(1, path.size()).toArray(EMPTY_STRING_ARRAY), key, META_CHILD
Expand All @@ -240,7 +238,11 @@ private static FieldReference parse(final CharSequence reference) {
**/
private static class StrictTokenizer {

public List<String> tokenize(CharSequence reference) {
/**
* @param reference a sequence of characters representing a reference to a field
* @return a list of string path fragments.
*/
public List<String> tokenize(final CharSequence reference) {
ArrayList<String> path = new ArrayList<>();
final int length = reference.length();

Expand Down Expand Up @@ -286,7 +288,7 @@ public List<String> tokenize(CharSequence reference) {

if (splitPoint < i) {
// if we have something to add, add it.
path.add(reference.subSequence(splitPoint, i).toString().intern());
path.add(reference.subSequence(splitPoint, i).toString());
}

depth--;
Expand All @@ -309,7 +311,9 @@ public List<String> tokenize(CharSequence reference) {
// if we saw no brackets, this is a top-level reference that can be emitted as-is without
// further processing
path.add(reference.toString());
path.trimToSize();
return path;

} else if (depth > 0) {
// when we hit the end-of-input while still in an open bracket, we have an invalid field reference
potentiallyAmbiguousSyntaxDetected = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@
import java.util.Map;
import org.assertj.core.api.Assertions;
import org.hamcrest.CoreMatchers;
import org.jruby.RubyBoolean;
import org.jruby.RubyHash;
import org.jruby.RubyString;
import org.jruby.exceptions.RuntimeError;
import org.jruby.javasupport.JavaUtil;
import org.jruby.runtime.Block;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.junit.Assert;
Expand Down Expand Up @@ -98,19 +100,21 @@ public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferences() {
}

@Test
public void correctlyRaiseRubyRuntimeErrorWhenGivenInvalidFieldReferencesInMap() {
public void correctlySetsValueWhenGivenMapWithKeysThatHaveFieldReferenceSpecialCharacters() {
final ThreadContext context = RubyUtil.RUBY.getCurrentContext();
final JrubyEventExtLibrary.RubyEvent event =
JrubyEventExtLibrary.RubyEvent.newRubyEvent(context.runtime);
final RubyString key = rubyString("foo");
final RubyHash value = RubyHash.newHash(context.runtime, Collections.singletonMap(rubyString("il[[]]]legal"), rubyString("okay")), context.nil);
try {
event.ruby_set_field(context, key, value);
} catch (RuntimeError rubyRuntimeError) {
Assert.assertThat(rubyRuntimeError.getLocalizedMessage(), CoreMatchers.containsString("Invalid FieldReference"));
return;
}
Assert.fail("expected ruby RuntimeError was not thrown.");

event.ruby_set_field(context, key, value);
IRubyObject retrievedValue = event.ruby_get_field(context, key);
Assert.assertThat(retrievedValue, CoreMatchers.equalTo(value));

RubyHash eventHash = (RubyHash) event.ruby_to_hash_with_metadata(context);
IRubyObject nestedValue = eventHash.dig(context, rubyString("foo"), rubyString("il[[]]]legal"));
Assert.assertFalse(nestedValue.isNil());
Assert.assertEquals(rubyString("okay"), nestedValue);
}

private static RubyString rubyString(final String java) {
Expand Down

0 comments on commit 9f8be9e

Please sign in to comment.