From c0b06dd18ac947349170c11999027f1d121193d4 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Tue, 30 Oct 2018 13:52:29 +0100 Subject: [PATCH] Use TreeMap instead of HashMap for JsObject key/value pairs, fixes #277 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem is that with String's hashCode implementation it is too simple to create synthetic collisions. This allows an attacker to create an object with keys that all collide which leads to a performance drop for the HashMap just for creating the map in the first place. See https://github.com/scala/bug/issues/11203 for more information about the underlying HashMap issue. For the time being, it seems safer to use a TreeMap which uses String ordering. Benchmarks suggest that using a TreeMap is only ~6% slower for reasonably sized JSON objects up to 100 keys. Benchmark for non-colliding keys: Benchmark (_size) (parser) Mode Cnt Score Error Units ExtractFieldsBenchmark.readSpray 1 HashMap thrpt 5 1195832.262 ± 64366.605 ops/s ExtractFieldsBenchmark.readSpray 1 TreeMap thrpt 5 1342009.641 ± 17307.555 ops/s ExtractFieldsBenchmark.readSpray 10 HashMap thrpt 5 237173.327 ± 70341.742 ops/s ExtractFieldsBenchmark.readSpray 10 TreeMap thrpt 5 233510.618 ± 69638.750 ops/s ExtractFieldsBenchmark.readSpray 100 HashMap thrpt 5 23202.016 ± 1514.763 ops/s ExtractFieldsBenchmark.readSpray 100 TreeMap thrpt 5 21899.072 ± 823.225 ops/s ExtractFieldsBenchmark.readSpray 1000 HashMap thrpt 5 2073.754 ± 66.093 ops/s ExtractFieldsBenchmark.readSpray 1000 TreeMap thrpt 5 1793.329 ± 43.603 ops/s ExtractFieldsBenchmark.readSpray 10000 HashMap thrpt 5 208.160 ± 7.466 ops/s ExtractFieldsBenchmark.readSpray 10000 TreeMap thrpt 5 160.349 ± 5.809 ops/s --- src/main/scala/spray/json/JsValue.scala | 7 +++-- src/main/scala/spray/json/JsonParser.scala | 7 +++-- .../scala/spray/json/HashCodeCollider.scala | 26 ++++++++++++++++ .../scala/spray/json/JsonParserSpec.scala | 30 +++++++++++++++++++ 4 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 src/test/scala/spray/json/HashCodeCollider.scala diff --git a/src/main/scala/spray/json/JsValue.scala b/src/main/scala/spray/json/JsValue.scala index 7cd8cd83..9ed94da5 100644 --- a/src/main/scala/spray/json/JsValue.scala +++ b/src/main/scala/spray/json/JsValue.scala @@ -19,6 +19,7 @@ package spray.json import collection.immutable +import scala.collection.immutable.TreeMap /** * The general type of a JSON AST node. @@ -53,10 +54,10 @@ case class JsObject(fields: Map[String, JsValue]) extends JsValue { def getFields(fieldNames: String*): immutable.Seq[JsValue] = fieldNames.toIterator.flatMap(fields.get).toList } object JsObject { - val empty = JsObject(Map.empty[String, JsValue]) - def apply(members: JsField*) = new JsObject(Map(members: _*)) + val empty = JsObject(TreeMap.empty[String, JsValue]) + def apply(members: JsField*): JsObject = new JsObject(TreeMap(members: _*)) @deprecated("Use JsObject(JsValue*) instead", "1.3.0") - def apply(members: List[JsField]) = new JsObject(Map(members: _*)) + def apply(members: List[JsField]): JsObject = apply(members: _*) } /** diff --git a/src/main/scala/spray/json/JsonParser.scala b/src/main/scala/spray/json/JsonParser.scala index 71c4c119..fb94b9c3 100644 --- a/src/main/scala/spray/json/JsonParser.scala +++ b/src/main/scala/spray/json/JsonParser.scala @@ -18,9 +18,11 @@ package spray.json import scala.annotation.{switch, tailrec} import java.lang.{StringBuilder => JStringBuilder} -import java.nio.{CharBuffer, ByteBuffer} +import java.nio.{ByteBuffer, CharBuffer} import java.nio.charset.Charset +import scala.collection.immutable.TreeMap + /** * Fast, no-dependency parser for JSON as defined by http://tools.ietf.org/html/rfc4627. */ @@ -86,8 +88,7 @@ class JsonParser(input: ParserInput) { val nextMap = map.updated(key, jsValue) if (ws(',')) members(nextMap) else nextMap } - var map = Map.empty[String, JsValue] - map = members(map) + val map = members(TreeMap.empty[String, JsValue]) require('}') JsObject(map) } else { diff --git a/src/test/scala/spray/json/HashCodeCollider.scala b/src/test/scala/spray/json/HashCodeCollider.scala new file mode 100644 index 00000000..57388b94 --- /dev/null +++ b/src/test/scala/spray/json/HashCodeCollider.scala @@ -0,0 +1,26 @@ +package spray.json + +/** + * Helper that creates strings that all share the same hashCode == 0. + * + * Adapted from MIT-licensed code by Andriy Plokhotnyuk + * at https://github.com/plokhotnyuk/jsoniter-scala/blob/26b5ecdd4f8c2ab7e97bd8106cefdda4c1e701ce/jsoniter-scala-benchmark/src/main/scala/com/github/plokhotnyuk/jsoniter_scala/macros/HashCodeCollider.scala#L6. + */ +object HashCodeCollider { + val visibleChars = (33 until 127).filterNot(c => c == '\\' || c == '"') + def asciiChars: Iterator[Int] = visibleChars.toIterator + def asciiCharsAndHash(previousHash: Int): Iterator[(Int, Int)] = visibleChars.toIterator.map(c => c -> (previousHash + c) * 31) + + /** Creates an iterator of Strings that all have hashCode == 0 */ + def zeroHashCodeIterator(): Iterator[String] = + for { + (i0, h0) <- asciiCharsAndHash(0) + (i1, h1) <- asciiCharsAndHash(h0) + (i2, h2) <- asciiCharsAndHash(h1) if (((h2 + 32) * 923521) ^ ((h2 + 127) * 923521)) < 0 + (i3, h3) <- asciiCharsAndHash(h2) if (((h3 + 32) * 29791) ^ ((h3 + 127) * 29791)) < 0 + (i4, h4) <- asciiCharsAndHash(h3) if (((h4 + 32) * 961) ^ ((h4 + 127) * 961)) < 0 + (i5, h5) <- asciiCharsAndHash(h4) if (((h5 + 32) * 31) ^ ((h5 + 127) * 31)) < 0 + (i6, h6) <- asciiCharsAndHash(h5) if ((h6 + 32) ^ (h6 + 127)) < 0 + (i7, h7) <- asciiCharsAndHash(h6) if h6 + i7 == 0 + } yield new String(Array(i0, i1, i2, i3, i4, i5, i6, i7).map(_.toChar)) +} diff --git a/src/test/scala/spray/json/JsonParserSpec.scala b/src/test/scala/spray/json/JsonParserSpec.scala index e5645a4b..36c9726d 100644 --- a/src/test/scala/spray/json/JsonParserSpec.scala +++ b/src/test/scala/spray/json/JsonParserSpec.scala @@ -84,6 +84,36 @@ class JsonParserSpec extends Specification { ) list.map(_.asInstanceOf[JsObject].fields("questions").asInstanceOf[JsArray].elements.size) === List.fill(20)(100) } + "not show bad performance characteristics when object keys' hashCodes collide" in { + val numKeys = 10000 + val value = "null" + + val regularKeys = Iterator.from(1).map(i => s"key_$i").take(numKeys) + val collidingKeys = HashCodeCollider.zeroHashCodeIterator().take(numKeys) + + def createJson(keys: Iterator[String]): String = keys.mkString("""{"""", s"""":$value,"""", s"""":$value}""") + + def nanoBench(block: => Unit): Long = { + // great microbenchmark (the comment must be kept, otherwise it's not true) + val f = block _ + + // warmup + (1 to 10).foreach(_ => f()) + + val start = System.nanoTime() + f() + val end = System.nanoTime() + end - start + } + + val regularJson = createJson(regularKeys) + val collidingJson = createJson(collidingKeys) + + val regularTime = nanoBench { JsonParser(regularJson) } + val collidingTime = nanoBench { JsonParser(collidingJson) } + + collidingTime / regularTime must be < 2L // speed must be in same order of magnitude + } "produce proper error messages" in { def errorMessage(input: String) =