Skip to content

Commit

Permalink
Use TreeMap instead of HashMap for JsObject key/value pairs, fixes sp…
Browse files Browse the repository at this point in the history
…ray#277

The problem is that with String's hashCode implementation it is too simple to
create synthetic collisions. This allows an attacker to create an object with
keys that all collide which leads to a performance drop for the HashMap
just for creating the map in the first place. See scala/bug#11203
for more information about the underlying HashMap issue.

For the time being, it seems safer to use a TreeMap which uses String ordering.
Benchmarks suggest that using a TreeMap is only ~6% slower for reasonably sized JSON objects
up to 100 keys.

Benchmark for non-colliding keys:

Benchmark                         (_size)  (parser)   Mode  Cnt        Score       Error  Units
ExtractFieldsBenchmark.readSpray        1   HashMap  thrpt    5  1195832.262 ± 64366.605  ops/s
ExtractFieldsBenchmark.readSpray        1   TreeMap  thrpt    5  1342009.641 ± 17307.555  ops/s
ExtractFieldsBenchmark.readSpray       10   HashMap  thrpt    5   237173.327 ± 70341.742  ops/s
ExtractFieldsBenchmark.readSpray       10   TreeMap  thrpt    5   233510.618 ± 69638.750  ops/s
ExtractFieldsBenchmark.readSpray      100   HashMap  thrpt    5    23202.016 ±  1514.763  ops/s
ExtractFieldsBenchmark.readSpray      100   TreeMap  thrpt    5    21899.072 ±   823.225  ops/s
ExtractFieldsBenchmark.readSpray     1000   HashMap  thrpt    5     2073.754 ±    66.093  ops/s
ExtractFieldsBenchmark.readSpray     1000   TreeMap  thrpt    5     1793.329 ±    43.603  ops/s
ExtractFieldsBenchmark.readSpray    10000   HashMap  thrpt    5      208.160 ±     7.466  ops/s
ExtractFieldsBenchmark.readSpray    10000   TreeMap  thrpt    5      160.349 ±     5.809  ops/s
  • Loading branch information
jrudolph committed Oct 30, 2018
1 parent caee7a0 commit 1a05a62
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 13 deletions.
7 changes: 4 additions & 3 deletions src/main/scala/spray/json/JsValue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package spray.json

import collection.immutable
import scala.collection.immutable.TreeMap

/**
* The general type of a JSON AST node.
Expand Down Expand Up @@ -53,10 +54,10 @@ case class JsObject(fields: Map[String, JsValue]) extends JsValue {
def getFields(fieldNames: String*): immutable.Seq[JsValue] = fieldNames.toIterator.flatMap(fields.get).toList
}
object JsObject {
val empty = JsObject(Map.empty[String, JsValue])
def apply(members: JsField*) = new JsObject(Map(members: _*))
val empty = JsObject(TreeMap.empty[String, JsValue])
def apply(members: JsField*): JsObject = new JsObject(TreeMap(members: _*))
@deprecated("Use JsObject(JsValue*) instead", "1.3.0")
def apply(members: List[JsField]) = new JsObject(Map(members: _*))
def apply(members: List[JsField]): JsObject = apply(members: _*)
}

/**
Expand Down
7 changes: 4 additions & 3 deletions src/main/scala/spray/json/JsonParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package spray.json

import scala.annotation.{switch, tailrec}
import java.lang.{StringBuilder => JStringBuilder}
import java.nio.{CharBuffer, ByteBuffer}
import java.nio.{ByteBuffer, CharBuffer}
import java.nio.charset.Charset

import scala.collection.immutable.TreeMap

/**
* Fast, no-dependency parser for JSON as defined by http://tools.ietf.org/html/rfc4627.
*/
Expand Down Expand Up @@ -86,8 +88,7 @@ class JsonParser(input: ParserInput) {
val nextMap = map.updated(key, jsValue)
if (ws(',')) members(nextMap) else nextMap
}
var map = Map.empty[String, JsValue]
map = members(map)
val map = members(TreeMap.empty[String, JsValue])
require('}')
JsObject(map)
} else {
Expand Down
26 changes: 26 additions & 0 deletions src/test/scala/spray/json/HashCodeCollider.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package spray.json

/**
* Helper that creates strings that all share the same hashCode == 0.
*
* Adapted from MIT-licensed code by Andriy Plokhotnyuk
* at https://github.com/plokhotnyuk/jsoniter-scala/blob/26b5ecdd4f8c2ab7e97bd8106cefdda4c1e701ce/jsoniter-scala-benchmark/src/main/scala/com/github/plokhotnyuk/jsoniter_scala/macros/HashCodeCollider.scala#L6.
*/
object HashCodeCollider {
val visibleChars = (33 until 127).filterNot(c => c == '\\' || c == '"')
def asciiChars: Iterator[Int] = visibleChars.toIterator
def asciiCharsAndHash(previousHash: Int): Iterator[(Int, Int)] = visibleChars.toIterator.map(c => c -> (previousHash + c) * 31)

/** Creates an iterator of Strings that all have hashCode == 0 */
def zeroHashCodeIterator(): Iterator[String] =
for {
(i0, h0) <- asciiCharsAndHash(0)
(i1, h1) <- asciiCharsAndHash(h0)
(i2, h2) <- asciiCharsAndHash(h1) if (((h2 + 32) * 923521) ^ ((h2 + 127) * 923521)) < 0
(i3, h3) <- asciiCharsAndHash(h2) if (((h3 + 32) * 29791) ^ ((h3 + 127) * 29791)) < 0
(i4, h4) <- asciiCharsAndHash(h3) if (((h4 + 32) * 961) ^ ((h4 + 127) * 961)) < 0
(i5, h5) <- asciiCharsAndHash(h4) if (((h5 + 32) * 31) ^ ((h5 + 127) * 31)) < 0
(i6, h6) <- asciiCharsAndHash(h5) if ((h6 + 32) ^ (h6 + 127)) < 0
(i7, h7) <- asciiCharsAndHash(h6) if h6 + i7 == 0
} yield new String(Array(i0, i1, i2, i3, i4, i5, i6, i7).map(_.toChar))
}
30 changes: 30 additions & 0 deletions src/test/scala/spray/json/JsonParserSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,36 @@ class JsonParserSpec extends Specification {
)
list.map(_.asInstanceOf[JsObject].fields("questions").asInstanceOf[JsArray].elements.size) === List.fill(20)(100)
}
"not show bad performance characteristics when object keys' hashCodes collide" in {
val numKeys = 10000
val value = "null"

val regularKeys = Iterator.from(1).map(i => s"key_$i").take(numKeys)
val collidingKeys = HashCodeCollider.zeroHashCodeIterator().take(numKeys)

def createJson(keys: Iterator[String]): String = keys.mkString("""{"""", s"""":$value,"""", s"""":$value}""")

def nanoBench(block: => Unit): Long = {
// great microbenchmark (the comment must be kept, otherwise it's not true)
val f = block _

// warmup
(1 to 10).foreach(_ => f())

val start = System.nanoTime()
f()
val end = System.nanoTime()
end - start
}

val regularJson = createJson(regularKeys)
val collidingJson = createJson(collidingKeys)

val regularTime = nanoBench { JsonParser(regularJson) }
val collidingTime = nanoBench { JsonParser(collidingJson) }

collidingTime / regularTime must be < 2L // speed must be in same order of magnitude
}

"produce proper error messages" in {
def errorMessage(input: String) =
Expand Down
19 changes: 12 additions & 7 deletions src/test/scala/spray/json/PrettyPrinterSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PrettyPrinterSpec extends Specification {

"The PrettyPrinter" should {
"print a more complicated JsObject nicely aligned" in {
val JsObject(fields) = JsonParser {
val js = JsonParser {
"""{
| "Boolean no": false,
| "Boolean yes":true,
Expand All @@ -40,7 +40,12 @@ class PrettyPrinterSpec extends Specification {
| "zero": 0
|}""".stripMargin
}
PrettyPrinter(JsObject(ListMap(fields.toSeq.sortBy(_._1):_*))) mustEqual {
def fixedFieldOrder(js: JsValue): JsValue = js match {
case JsObject(fields) => JsObject(ListMap(fields.toSeq.sortBy(_._1).map { case (k, v) => (k, fixedFieldOrder(v)) }:_*))
case x => x
}

PrettyPrinter(fixedFieldOrder(js)) mustEqual {
"""{
| "Boolean no": false,
| "Boolean yes": true,
Expand All @@ -50,17 +55,17 @@ class PrettyPrinterSpec extends Specification {
| "number": -0.000012323424,
| "simpleKey": "some value",
| "sub object": {
| "sub key": 26.5,
| "a": "b",
| "array": [1, 2, {
| "yes": 1,
| "no": 0
| }, ["a", "b", null], false]
| "no": 0,
| "yes": 1
| }, ["a", "b", null], false],
| "sub key": 26.5
| },
| "zero": 0
|}""".stripMargin
}
}
}

}

0 comments on commit 1a05a62

Please sign in to comment.