Skip to content

Commit

Permalink
Tries improvements (#5736)
Browse files Browse the repository at this point in the history
* ArrayTrie handles full alphabet
* TreeTrie handles case sensitive
* improved trie selection from Index builders
* optimisations, cleanups and benchmarks.

Signed-off-by: Greg Wilkins <gregw@webtide.com>
  • Loading branch information
gregw authored Jan 5, 2021
1 parent 480767a commit 51120b1
Show file tree
Hide file tree
Showing 18 changed files with 1,958 additions and 1,026 deletions.
16 changes: 7 additions & 9 deletions jetty-http/src/main/java/org/eclipse/jetty/http/HttpField.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,16 @@ public boolean contains(String search)
return false;
if (_value == null)
return false;
if (search.equals(_value))
if (search.equalsIgnoreCase(_value))
return true;

search = StringUtil.asciiToLowerCase(search);

int state = 0;
int match = 0;
int param = 0;

for (int i = 0; i < _value.length(); i++)
{
char c = _value.charAt(i);
char c = StringUtil.asciiToLowerCase(_value.charAt(i));
switch (state)
{
case 0: // initial white space
Expand All @@ -181,7 +179,7 @@ public boolean contains(String search)
break;

default: // character
match = Character.toLowerCase(c) == search.charAt(0) ? 1 : -1;
match = c == StringUtil.asciiToLowerCase(search.charAt(0)) ? 1 : -1;
state = 1;
break;
}
Expand All @@ -206,7 +204,7 @@ public boolean contains(String search)
if (match > 0)
{
if (match < search.length())
match = Character.toLowerCase(c) == search.charAt(match) ? (match + 1) : -1;
match = c == StringUtil.asciiToLowerCase(search.charAt(match)) ? (match + 1) : -1;
else if (c != ' ' && c != '\t')
match = -1;
}
Expand All @@ -229,7 +227,7 @@ else if (c != ' ' && c != '\t')
if (match >= 0)
{
if (match < search.length())
match = Character.toLowerCase(c) == search.charAt(match) ? (match + 1) : -1;
match = c == StringUtil.asciiToLowerCase(search.charAt(match)) ? (match + 1) : -1;
else
match = -1;
}
Expand All @@ -240,7 +238,7 @@ else if (c != ' ' && c != '\t')
if (match >= 0)
{
if (match < search.length())
match = Character.toLowerCase(c) == search.charAt(match) ? (match + 1) : -1;
match = c == StringUtil.asciiToLowerCase(search.charAt(match)) ? (match + 1) : -1;
else
match = -1;
}
Expand Down Expand Up @@ -290,7 +288,7 @@ else if (c != ' ' && c != '\t')
if (param >= 0)
{
if (param < __zeroquality.length())
param = Character.toLowerCase(c) == __zeroquality.charAt(param) ? (param + 1) : -1;
param = c == __zeroquality.charAt(param) ? (param + 1) : -1;
else if (c != '0' && c != '.')
param = -1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@

import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.function.Function;

import org.eclipse.jetty.util.BufferUtil;
import org.eclipse.jetty.util.Index;
import org.eclipse.jetty.util.StringUtil;

/**
*
Expand Down Expand Up @@ -71,7 +73,7 @@ public String toString()
return _string;
}

private static EnumSet<HttpHeader> __known =
private static final EnumSet<HttpHeader> __known =
EnumSet.of(HttpHeader.CONNECTION,
HttpHeader.TRANSFER_ENCODING,
HttpHeader.CONTENT_ENCODING);
Expand All @@ -82,4 +84,89 @@ public static boolean hasKnownValues(HttpHeader header)
return false;
return __known.contains(header);
}

/**
* Parse an unquoted comma separated list of index keys.
* @param value A string list of index keys, separated with commas and possible white space
* @param found The function to call for all found index entries. If the function returns false parsing is halted.
* @return true if parsing completed normally and all found index items returned true from the found function.
*/
public static boolean parseCsvIndex(String value, Function<HttpHeaderValue, Boolean> found)
{
return parseCsvIndex(value, found, null);
}

/**
* Parse an unquoted comma separated list of index keys.
* @param value A string list of index keys, separated with commas and possible white space
* @param found The function to call for all found index entries. If the function returns false parsing is halted.
* @param unknown The function to call for foound unknown entries. If the function returns false parsing is halted.
* @return true if parsing completed normally and all found index items returned true from the found function.
*/
public static boolean parseCsvIndex(String value, Function<HttpHeaderValue, Boolean> found, Function<String, Boolean> unknown)
{
if (StringUtil.isBlank(value))
return true;
int next = 0;
parsing: while (next < value.length())
{
// Look for the best fit next token
HttpHeaderValue token = CACHE.getBest(value, next, value.length() - next);

// if a token is found
if (token != null)
{
// check that it is only followed by whatspace, EOL and/or comma
int i = next + token.toString().length();
loop: while (true)
{
if (i >= value.length())
return found.apply(token);
switch (value.charAt(i))
{
case ',':
if (!found.apply(token))
return false;
next = i + 1;
continue parsing;
case ' ':
break;
default:
break loop;
}
i++;
}
}

// Token was not correctly matched
if (' ' == value.charAt(next))
{
next++;
continue;
}

int comma = value.indexOf(',', next);
if (comma == next)
{
next++;
continue;
}
else if (comma > next)
{
if (unknown == null)
{
next = comma + 1;
continue;
}
String v = value.substring(next, comma).trim();
if (StringUtil.isBlank(v) || unknown.apply(v))
{
next = comma + 1;
continue;
}
}
return false;
}
return true;
}
}
13 changes: 2 additions & 11 deletions jetty-http/src/main/java/org/eclipse/jetty/http/HttpParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ public class HttpParser
.with(new HttpField(HttpHeader.ACCEPT_ENCODING, "gzip"))
.with(new HttpField(HttpHeader.ACCEPT_ENCODING, "gzip, deflate"))
.with(new HttpField(HttpHeader.ACCEPT_ENCODING, "gzip, deflate, br"))
.with(new HttpField(HttpHeader.ACCEPT_ENCODING, "gzip,deflate,sdch"))
.with(new HttpField(HttpHeader.ACCEPT_LANGUAGE, "en-US,enq=0.5"))
.with(new HttpField(HttpHeader.ACCEPT_LANGUAGE, "en-GB,en-USq=0.8,enq=0.6"))
.with(new HttpField(HttpHeader.ACCEPT_LANGUAGE, "en-AU,enq=0.9,it-ITq=0.8,itq=0.7,en-GBq=0.6,en-USq=0.5"))
Expand Down Expand Up @@ -1058,19 +1057,11 @@ else if (_endOfContent == EndOfContent.CHUNKED_CONTENT)
if (addToFieldCache && _header != null && _valueString != null)
{
if (_fieldCache == null)
{
_fieldCache = (getHeaderCacheSize() > 0 && (_version != null && _version == HttpVersion.HTTP_1_1))
? new Index.Builder<HttpField>()
.caseSensitive(false)
.mutable()
.maxCapacity(getHeaderCacheSize())
.build()
: NO_CACHE;
}
_fieldCache = Index.buildCaseSensitiveMutableVisibleAsciiAlphabet(getHeaderCacheSize());

if (_field == null)
_field = new HttpField(_header, caseInsensitiveHeader(_headerString, _header.asString()), _valueString);
if (!_fieldCache.put(_field))
if (_field.getValue().length() < getHeaderCacheSize() && !_fieldCache.put(_field))
{
_fieldCache.clear();
_fieldCache.put(_field);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand Down Expand Up @@ -2971,4 +2972,82 @@ public void onComplianceViolation(ComplianceViolation.Mode mode, ComplianceViola
_complianceViolation.add(violation);
}
}

@Test
public void testHttpHeaderValueParseCsv()
{
final List<HttpHeaderValue> list = new ArrayList<>();
final List<String> unknowns = new ArrayList<>();

assertTrue(HttpHeaderValue.parseCsvIndex("", list::add, unknowns::add));
assertThat(list, empty());
assertThat(unknowns, empty());

assertTrue(HttpHeaderValue.parseCsvIndex(" ", list::add, unknowns::add));
assertThat(list, empty());
assertThat(unknowns, empty());

assertTrue(HttpHeaderValue.parseCsvIndex(",", list::add, unknowns::add));
assertThat(list, empty());
assertThat(unknowns, empty());

assertTrue(HttpHeaderValue.parseCsvIndex(",,", list::add, unknowns::add));
assertThat(list, empty());
assertThat(unknowns, empty());

assertTrue(HttpHeaderValue.parseCsvIndex(" , , ", list::add, unknowns::add));
assertThat(list, empty());
assertThat(unknowns, empty());

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex("close", list::add));
assertThat(list, contains(HttpHeaderValue.CLOSE));

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex(" close ", list::add));
assertThat(list, contains(HttpHeaderValue.CLOSE));

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex(",close,", list::add));
assertThat(list, contains(HttpHeaderValue.CLOSE));

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex(" , close , ", list::add));
assertThat(list, contains(HttpHeaderValue.CLOSE));

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex(" close,GZIP, chunked , Keep-Alive ", list::add));
assertThat(list, contains(HttpHeaderValue.CLOSE, HttpHeaderValue.GZIP, HttpHeaderValue.CHUNKED, HttpHeaderValue.KEEP_ALIVE));

list.clear();
assertTrue(HttpHeaderValue.parseCsvIndex(" close,GZIP, chunked , Keep-Alive ", t ->
{
if (t.toString().startsWith("c"))
list.add(t);
return true;
}));
assertThat(list, contains(HttpHeaderValue.CLOSE, HttpHeaderValue.CHUNKED));

list.clear();
assertFalse(HttpHeaderValue.parseCsvIndex(" close,GZIP, chunked , Keep-Alive ", t ->
{
if (HttpHeaderValue.CHUNKED == t)
return false;
list.add(t);
return true;
}));
assertThat(list, contains(HttpHeaderValue.CLOSE, HttpHeaderValue.GZIP));

list.clear();
unknowns.clear();
assertTrue(HttpHeaderValue.parseCsvIndex("closed,close, unknown , bytes", list::add, unknowns::add));
assertThat(list, contains(HttpHeaderValue.CLOSE, HttpHeaderValue.BYTES));
assertThat(unknowns, contains("closed", "unknown"));

list.clear();
unknowns.clear();
assertFalse(HttpHeaderValue.parseCsvIndex("close, unknown , bytes", list::add, s -> false));
assertThat(list, contains(HttpHeaderValue.CLOSE));
assertThat(unknowns, empty());
}
}
Loading

0 comments on commit 51120b1

Please sign in to comment.