From 70c81c4cd532e86f1baca84f8a54a54f495c511e Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 31 Mar 2020 09:25:35 -0400 Subject: [PATCH 01/29] Unwind just the "pseudorandom probing" (whole hash-code-keyed variable stride double hashing) part of recent sets & tables changes (which has still been causing bugs over a month later (e.g., two days ago https://github.com/nim-lang/Nim/issues/13794) as well as still having several "figure this out" implementation question comments in them (see just diffs of this PR). This topic has been discussed in many places: https://github.com/nim-lang/Nim/issues/13393 https://github.com/nim-lang/Nim/pull/13418 https://github.com/nim-lang/Nim/pull/13440 https://github.com/nim-lang/Nim/issues/13794 Alternative/non-mandatory stronger integer hashes (or vice-versa opt-in identity hashes) are a better solution that is more general (no illusion of one hard-coded sequence solving all problems) while retaining the virtues of linear probing such as cache obliviousness and age-less tables under delete-heavy workloads (still untested after a month of this change). The only real solution for truly adversarial keys is a hash keyed off of data unobservable to attackers. That all fits better with a few families of user-pluggable/define-switchable hashes which can be provided in a separate PR more about `hashes.nim`. This PR carefully preserves the better (but still hard coded!) probing of the `intsets` and other recent fixes like `move` annotations, hash order invariant tests, `intsets.missingOrExcl` fixing, and the move of `rightSize` into `hashcommon.nim`. --- lib/pure/collections/hashcommon.nim | 80 +++++----------------- lib/pure/collections/intsets.nim | 8 +-- lib/pure/collections/setimpl.nim | 28 +++++--- lib/pure/collections/sets.nim | 14 ++-- lib/pure/collections/sharedtables.nim | 4 +- lib/pure/collections/tableimpl.nim | 42 +++++++----- lib/pure/collections/tables.nim | 96 ++++++++------------------- tests/arc/trepr.nim | 2 +- 8 files changed, 99 insertions(+), 175 deletions(-) diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim index bfc09ec3cc92e..851b6cae94e47 100644 --- a/lib/pure/collections/hashcommon.nim +++ b/lib/pure/collections/hashcommon.nim @@ -18,87 +18,43 @@ when not defined(nimHasDefault): var v: T v -const freeMarker = 0 -const deletedMarker = -1 - -type UHash = uint - # hcode for real keys cannot be zero. hcode==0 signifies an empty slot. These # two procs retain clarity of that encoding without the space cost of an enum. -proc isFilledAndValid(hcode: Hash): bool {.inline.} = - result = hcode != 0 and hcode != deletedMarker - # performance: we could use bit magic if needed +proc isEmpty(hcode: Hash): bool {.inline.} = + result = hcode == 0 proc isFilled(hcode: Hash): bool {.inline.} = result = hcode != 0 - -proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} = - result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask)) - -proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} = - # FACTOR between hashcommon.nextTry, intsets.nextTry - # an optimization would be to use `(h + 1) and maxHash` for a few iterations - # and then switch to the formula below, to get "best of both worlds": good - # cache locality, except when a collision cluster is detected (ie, large number - # of iterations). - const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)` - result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash)) - perturb = perturb shr PERTURB_SHIFT +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash proc mustRehash[T](t: T): bool {.inline.} = - # FACTOR between hashcommon.mustRehash, intsets.mustRehash - let counter2 = t.counter + t.countDeleted - let length = t.dataLen - assert(length > counter2) - result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize` + assert(t.data.len > t.counter) + result = (t.data.len * 2 < t.counter * 3) or (t.data.len - t.counter < 4) proc rightSize*(count: Natural): int {.inline.} = - ## Return the value of `initialSize` to support `count` items. + ## Return the value of ``initialSize`` to support ``count`` items. ## ## If more items are expected to be added, simply add that ## expected extra amount to the parameter before calling this. - ## - ## Internally, we want `mustRehash(t) == false` for t that was just resized. + # # Make sure to synchronize with `mustRehash` result = nextPowerOfTwo(count * 3 div 2 + 4) -template getPerturb(t: typed, hc: Hash): UHash = - # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors - # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines) - let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2 - # this makes a major difference for cases like #13393; it causes the bits - # that were masked out in 1st position so they'll be masked in instead, and - # influence the recursion in nextTry earlier rather than later. - translateBits(cast[uint](hc), numBitsMask) - template rawGetKnownHCImpl() {.dirty.} = if t.dataLen == 0: return -1 var h: Hash = hc and maxHash(t) # start with real hash value - var perturb = t.getPerturb(hc) - var deletedIndex = -1 - while true: - if isFilledAndValid(t.data[h].hcode): - # Compare hc THEN key with boolean short circuit. This makes the common case - # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. - # It does slow down succeeding lookups by one extra Hash cmp&and..usually - # just a few clock cycles, generally worth it for any non-integer-like A. - # performance: we optimize this: depending on type(key), skip hc comparison - if t.data[h].hcode == hc and t.data[h].key == key: - return h - h = nextTry(h, maxHash(t), perturb) - elif t.data[h].hcode == deletedMarker: - if deletedIndex == -1: - deletedIndex = h - h = nextTry(h, maxHash(t), perturb) - else: - break - if deletedIndex == -1: - result = -1 - h # < 0 => MISSING; insert idx = -1 - result - else: - # we prefer returning a (in fact the 1st found) deleted index - result = -1 - deletedIndex + while isFilled(t.data[h].hcode): + # Compare hc THEN key with boolean short circuit. This makes the common case + # zero ==key's for missing (e.g.inserts) and exactly one ==key for present. + # It does slow down succeeding lookups by one extra Hash cmp&and..usually + # just a few clock cycles, generally worth it for any non-integer-like A. + if t.data[h].hcode == hc and t.data[h].key == key: + return h + h = nextTry(h, maxHash(t)) + result = -1 - h # < 0 => MISSING; insert idx = -1 - result proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} = rawGetKnownHCImpl() @@ -107,8 +63,6 @@ template genHashImpl(key, hc: typed) = hc = hash(key) if hc == 0: # This almost never taken branch should be very predictable. hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine. - elif hc == deletedMarker: - hc = 214159261 template genHash(key: typed): Hash = var res: Hash diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim index 1967a01498d0d..eae3fa4471266 100644 --- a/lib/pure/collections/intsets.nim +++ b/lib/pure/collections/intsets.nim @@ -46,20 +46,16 @@ type IntSet* = object ## An efficient set of `int` implemented as a sparse bit set. elems: int # only valid for small numbers counter, max: int - countDeleted: int head: PTrunk data: TrunkSeq a: array[0..33, int] # profiling shows that 34 elements are enough proc mustRehash[T](t: T): bool {.inline.} = - # FACTOR between hashcommon.mustRehash, intsets.mustRehash - let counter2 = t.counter + t.countDeleted let length = t.max + 1 - assert length > counter2 - result = (length * 2 < counter2 * 3) or (length - counter2 < 4) + assert length > t.counter + result = (length * 2 < t.counter * 3) or (length - t.counter < 4) proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} = - # FACTOR between hashcommon.nextTry, intsets.nextTry const PERTURB_SHIFT = 5 var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT perturb = cast[Hash](perturb2) diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim index 9e68cb0722b4a..d798cbcb3d6b0 100644 --- a/lib/pure/collections/setimpl.nim +++ b/lib/pure/collections/setimpl.nim @@ -35,11 +35,10 @@ proc rawInsert[A](s: var HashSet[A], data: var KeyValuePairSeq[A], key: A, proc enlarge[A](s: var HashSet[A]) = var n: KeyValuePairSeq[A] - newSeq(n, s.counter.rightSize) + newSeq(n, len(s.data) * growthFactor) swap(s.data, n) # n is now old seq - s.countDeleted = 0 for i in countup(0, high(n)): - if isFilledAndValid(n[i].hcode): + if isFilled(n[i].hcode): var j = -1 - rawGetKnownHC(s, n[i].key, n[i].hcode) rawInsert(s, s.data, n[i].key, n[i].hcode, j) @@ -69,6 +68,11 @@ template containsOrInclImpl() {.dirty.} = rawInsert(s, s.data, key, hc, -1 - index) inc(s.counter) +template doWhile(a, b) = + while true: + b + if not a: break + proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = var hc: Hash var i = rawGet(s, key, hc) @@ -78,9 +82,17 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} = if i >= 0: result = false dec(s.counter) - inc(s.countDeleted) - s.data[i].hcode = deletedMarker - s.data[i].key = default(type(s.data[i].key)) + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + s.data[i].hcode = 0 # mark current EMPTY + s.data[i].key = default(type(s.data[i].key)) + doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + i = (i + 1) and msk # increment mod table size + if isEmpty(s.data[i].hcode): # end of collision cluster; So all done + return + r = s.data[i].hcode and msk # "home" location of key@i + s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop template dollarImpl() {.dirty.} = result = "{" @@ -113,7 +125,7 @@ proc enlarge[A](s: var OrderedSet[A]) = swap(s.data, n) while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used + if isFilled(n[h].hcode): var j = -1 - rawGetKnownHC(s, n[h].key, n[h].hcode) rawInsert(s, s.data, n[h].key, n[h].hcode, j) h = nxt @@ -131,7 +143,7 @@ proc exclImpl[A](s: var OrderedSet[A], key: A): bool {.inline.} = result = true while h >= 0: var nxt = n[h].next - if isFilled(n[h].hcode): # should be isFilledAndValid once tombstones are used + if isFilled(n[h].hcode): if n[h].hcode == hc and n[h].key == key: dec s.counter result = false diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index caa25dbb31e25..8c04a05e3c852 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -68,7 +68,6 @@ type ## before calling other procs on it. data: KeyValuePairSeq[A] counter: int - countDeleted: int type OrderedKeyValuePair[A] = tuple[ @@ -81,7 +80,6 @@ type ## <#initOrderedSet,int>`_ before calling other procs on it. data: OrderedKeyValuePairSeq[A] counter, first, last: int - countDeleted: int const defaultInitialSize* = 64 @@ -249,7 +247,7 @@ iterator items*[A](s: HashSet[A]): A = ## echo b ## # --> {(a: 1, b: 3), (a: 0, b: 4)} for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): yield s.data[h].key + if isFilled(s.data[h].hcode): yield s.data[h].key proc containsOrIncl*[A](s: var HashSet[A], key: A): bool = ## Includes `key` in the set `s` and tells if `key` was already in `s`. @@ -341,7 +339,7 @@ proc pop*[A](s: var HashSet[A]): A = doAssertRaises(KeyError, echo s.pop) for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): + if isFilled(s.data[h].hcode): result = s.data[h].key excl(s, result) return result @@ -574,8 +572,7 @@ proc map*[A, B](data: HashSet[A], op: proc (x: A): B {.closure.}): HashSet[B] = proc hash*[A](s: HashSet[A]): Hash = ## Hashing of HashSet. for h in 0 .. high(s.data): - if isFilledAndValid(s.data[h].hcode): - result = result xor s.data[h].hcode + result = result xor s.data[h].hcode result = !$result proc `$`*[A](s: HashSet[A]): string = @@ -593,6 +590,7 @@ proc `$`*[A](s: HashSet[A]): string = ## # --> {no, esc'aping, is " provided} dollarImpl() + proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated: "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize) @@ -624,7 +622,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var idx = 0 while h >= 0: var nxt = s.data[h].next - if isFilledAndValid(s.data[h].hcode): + if isFilled(s.data[h].hcode): yieldStmt inc(idx) h = nxt @@ -858,7 +856,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool = while h >= 0 and g >= 0: var nxh = s.data[h].next var nxg = t.data[g].next - if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode): + if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode): if s.data[h].key == t.data[g].key: inc compared else: diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim index 27ac5e84f7e3b..2a1c0543f3b3b 100644 --- a/lib/pure/collections/sharedtables.nim +++ b/lib/pure/collections/sharedtables.nim @@ -25,7 +25,6 @@ type SharedTable*[A, B] = object ## generic hash SharedTable data: KeyValuePairSeq[A, B] counter, dataLen: int - countDeleted: int lock: Lock template maxHash(t): untyped = t.dataLen-1 @@ -50,10 +49,9 @@ proc enlarge[A, B](t: var SharedTable[A, B]) = for i in 0..= 0: dec(t.counter) - inc(t.countDeleted) - t.data[i].hcode = deletedMarker - t.data[i].key = default(type(t.data[i].key)) - t.data[i].val = default(type(t.data[i].val)) - # mustRehash + enlarge not needed because counter+countDeleted doesn't change + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + t.data[i].hcode = 0 # mark current EMPTY + t.data[i].key = default(type(t.data[i].key)) + t.data[i].val = default(type(t.data[i].val)) + while true: + i = (i + 1) and msk # increment mod table size + if isEmpty(t.data[i].hcode): # end of collision cluster; So all done + break outer + r = t.data[i].hcode and msk # "home" location of key@i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + else: + t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop template delImpl() {.dirty.} = var hc: Hash @@ -108,7 +115,6 @@ template clearImpl() {.dirty.} = t.counter = 0 template ctAnd(a, b): bool = - # pending https://github.com/nim-lang/Nim/issues/13502 when a: when b: true else: false @@ -126,7 +132,7 @@ template initImpl(result: typed, size: int) = result.last = -1 template insertImpl() = # for CountTable - checkIfInitialized() + if t.dataLen == 0: initImpl(t, defaultInitialSize) if mustRehash(t): enlarge(t) ctRawInsert(t, t.data, key, val) inc(t.counter) diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim index b79e396fd176f..131e0ad80360b 100644 --- a/lib/pure/collections/tables.nim +++ b/lib/pure/collections/tables.nim @@ -233,7 +233,6 @@ type ## For creating an empty Table, use `initTable proc<#initTable,int>`_. data: KeyValuePairSeq[A, B] counter: int - countDeleted: int TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_. ## ## For creating a new empty TableRef, use `newTable proc @@ -266,16 +265,14 @@ template get(t, key): untyped = proc enlarge[A, B](t: var Table[A, B]) = var n: KeyValuePairSeq[A, B] - newSeq(n, t.counter.rightSize) + newSeq(n, len(t.data) * growthFactor) swap(t.data, n) - t.countDeleted = 0 for i in countup(0, high(n)): let eh = n[i].hcode - if isFilledAndValid(eh): + if isFilled(eh): var j: Hash = eh and maxHash(t) - var perturb = t.getPerturb(eh) while isFilled(t.data[j].hcode): - j = nextTry(j, maxHash(t), perturb) + j = nextTry(j, maxHash(t)) when defined(js): rawInsert(t, t.data, n[i].key, n[i].val, eh, j) else: @@ -662,7 +659,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) = ## # value: [1, 5, 7, 9] let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -684,7 +681,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -705,7 +702,7 @@ iterator keys*[A, B](t: Table[A, B]): A = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].key assert(len(t) == L, "the length of the table changed while iterating over it") @@ -726,7 +723,7 @@ iterator values*[A, B](t: Table[A, B]): B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -748,27 +745,10 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") -template hasKeyOrPutCache(cache, h): bool = - # using `IntSet` would be an option worth considering to avoid quadratic - # behavior in case user misuses Table with lots of duplicate keys; but it - # has overhead in the common case of small number of duplicates. - # However: when lots of duplicates are used, all operations would be slow - # anyway because the full `hash(key)` is identical for these, which makes - # `nextTry` follow the exact same path for each key, resulting in large - # collision clusters. Alternatives could involve modifying the hash/retrieval - # based on duplicate key count. - var ret = false - for hi in cache: - if hi == h: - ret = true - break - if not ret: cache.add h - ret - iterator allValues*[A, B](t: Table[A, B]; key: A): B = ## Iterates over any value in the table ``t`` that belongs to the given ``key``. ## @@ -782,20 +762,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B = for i in 1..3: a.add('z', 10*i) doAssert toSeq(a.pairs).sorted == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)] doAssert sorted(toSeq(a.allValues('z'))) == @[10, 20, 30] - - let hc = genHash(key) - var h: Hash = hc and high(t.data) + var h: Hash = genHash(key) and high(t.data) let L = len(t) - var perturb = t.getPerturb(hc) - - var num = 0 - var cache: seq[Hash] - while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues` - if t.data[h].hcode == hc and t.data[h].key == key: - if not hasKeyOrPutCache(cache, h): - yield t.data[h].val - assert(len(t) == L, "the length of the table changed while iterating over it") - h = nextTry(h, high(t.data), perturb) + while isFilled(t.data[h].hcode): + if t.data[h].key == key: + yield t.data[h].val + assert(len(t) == L, "the length of the table changed while iterating over it") + h = nextTry(h, high(t.data)) @@ -1118,7 +1091,7 @@ iterator pairs*[A, B](t: TableRef[A, B]): (A, B) = ## # value: [1, 5, 7, 9] let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1140,7 +1113,7 @@ iterator mpairs*[A, B](t: TableRef[A, B]): (A, var B) = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield (t.data[h].key, t.data[h].val) assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1161,7 +1134,7 @@ iterator keys*[A, B](t: TableRef[A, B]): A = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].key assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1182,7 +1155,7 @@ iterator values*[A, B](t: TableRef[A, B]): B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1203,7 +1176,7 @@ iterator mvalues*[A, B](t: TableRef[A, B]): var B = let L = len(t) for h in 0 .. high(t.data): - if isFilledAndValid(t.data[h].hcode): + if isFilled(t.data[h].hcode): yield t.data[h].val assert(len(t) == L, "the length of the table changed while iterating over it") @@ -1229,8 +1202,6 @@ type ## <#initOrderedTable,int>`_. data: OrderedKeyValuePairSeq[A, B] counter, first, last: int - countDeleted: int - OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of ## `OrderedTable<#OrderedTable>`_. ## @@ -1252,7 +1223,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int = proc rawInsert[A, B](t: var OrderedTable[A, B], data: var OrderedKeyValuePairSeq[A, B], key: A, val: B, hc: Hash, h: Hash) = - rawInsertImpl(t) + rawInsertImpl() data[h].next = -1 if t.first < 0: t.first = h if t.last >= 0: data[t.last].next = h @@ -1260,20 +1231,18 @@ proc rawInsert[A, B](t: var OrderedTable[A, B], proc enlarge[A, B](t: var OrderedTable[A, B]) = var n: OrderedKeyValuePairSeq[A, B] - newSeq(n, t.counter.rightSize) + newSeq(n, len(t.data) * growthFactor) var h = t.first t.first = -1 t.last = -1 swap(t.data, n) - t.countDeleted = 0 while h >= 0: var nxt = n[h].next let eh = n[h].hcode - if isFilledAndValid(eh): + if isFilled(eh): var j: Hash = eh and maxHash(t) - var perturb = t.getPerturb(eh) while isFilled(t.data[j].hcode): - j = nextTry(j, maxHash(t), perturb) + j = nextTry(j, maxHash(t)) rawInsert(t, t.data, move n[h].key, move n[h].val, n[h].hcode, j) h = nxt @@ -1282,10 +1251,6 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var h = t.first while h >= 0: var nxt = t.data[h].next - # For OrderedTable/OrderedTableRef, isFilled is ok because `del` is O(n) - # and doesn't create tombsones, but if it does start using tombstones, - # carefully replace `isFilled` by `isFilledAndValid` as appropriate for these - # table types only, ditto with `OrderedSet`. if isFilled(t.data[h].hcode): yieldStmt h = nxt @@ -2229,7 +2194,6 @@ type ## <#initCountTable,int>`_. data: seq[tuple[key: A, val: int]] counter: int - countDeleted: int isSorted: bool CountTableRef*[A] = ref CountTable[A] ## Ref version of ## `CountTable<#CountTable>`_. @@ -2242,10 +2206,8 @@ type proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]], key: A, val: int) = - let hc = hash(key) - var perturb = t.getPerturb(hc) - var h: Hash = hc and high(data) - while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker + var h: Hash = hash(key) and high(data) + while data[h].val != 0: h = nextTry(h, high(data)) data[h].key = key data[h].val = val @@ -2272,12 +2234,10 @@ proc remove[A](t: var CountTable[A], key: A) = proc rawGet[A](t: CountTable[A], key: A): int = if t.data.len == 0: return -1 - let hc = hash(key) - var perturb = t.getPerturb(hc) - var h: Hash = hc and high(t.data) # start with real hash value - while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker? + var h: Hash = hash(key) and high(t.data) # start with real hash value + while t.data[h].val != 0: if t.data[h].key == key: return h - h = nextTry(h, high(t.data), perturb) + h = nextTry(h, high(t.data)) result = -1 - h # < 0 => MISSING; insert idx = -1 - result template ctget(t, key, default: untyped): untyped = diff --git a/tests/arc/trepr.nim b/tests/arc/trepr.nim index 391622ff6d04f..7a92112eda254 100644 --- a/tests/arc/trepr.nim +++ b/tests/arc/trepr.nim @@ -1,7 +1,7 @@ discard """ cmd: "nim c --gc:arc $file" nimout: '''(a: true, n: doAssert) -Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0) +Table[system.string, trepr.MyType](data: @[], counter: 0) nil ''' """ From 0433f4bec04eff183426be60a4361dd932ff5ebe Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 31 Mar 2020 10:10:16 -0400 Subject: [PATCH 02/29] Fix `data.len` -> `dataLen` problem. --- lib/pure/collections/hashcommon.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim index 851b6cae94e47..e998145e72028 100644 --- a/lib/pure/collections/hashcommon.nim +++ b/lib/pure/collections/hashcommon.nim @@ -30,8 +30,8 @@ proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = (h + 1) and maxHash proc mustRehash[T](t: T): bool {.inline.} = - assert(t.data.len > t.counter) - result = (t.data.len * 2 < t.counter * 3) or (t.data.len - t.counter < 4) + assert(t.dataLen > t.counter) + result = (t.dataLen * 2 < t.counter * 3) or (t.dataLen - t.counter < 4) proc rightSize*(count: Natural): int {.inline.} = ## Return the value of ``initialSize`` to support ``count`` items. From 5a43e2c4d259310b4dde0640042fb4c77e1553ed Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 31 Mar 2020 14:16:54 -0400 Subject: [PATCH 03/29] This is an alternate resolution to https://github.com/nim-lang/Nim/issues/13393 (which arguably could be resolved outside the stdlib). Add version1 of Wang Yi's hash specialized to 8 byte integers. This gives simple help to users having trouble with overly colliding hash(key)s. I.e., A) `import hashes; proc hash(x: myInt): Hash = hashWangYi1(int(x))` in the instantiation context of a `HashSet` or `Table` or B) more globally, compile with `nim c -d:hashWangYi1`. No hash can be all things to all use cases, but this one is A) vetted to scramble well by the SMHasher test suite (a necessarily limited but far more thorough test than prior proposals here), B) only a few ALU ops on many common CPUs, and C) possesses an easy via "grade school multi-digit multiplication" fall back for weaker deployment contexts. Some people might want to stampede ahead unbridled, but my view is that a good plan is to A) include this in the stdlib for a release or three to let people try it on various key sets nim-core could realistically never access/test (maybe mentioning it in the changelog so people actually try it out), B) have them report problems (if any), C) if all seems good, make the stdlib more novice friendly by adding `hashIdentity(x)=x` and changing the default `hash() = hashWangYi1` with some `when defined` rearranging so users can `-d:hashIdentity` if they want the old behavior back. This plan is compatible with any number of competing integer hashes if people want to add them. I would strongly recommend they all *at least* pass the SMHasher suite since the idea here is to become more friendly to novices who do not generally understand hashing failure modes. --- lib/pure/hashes.nim | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index d799fb0620c10..1fa0627e3d7a1 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -70,6 +70,38 @@ proc `!$`*(h: Hash): Hash {.inline.} = res = res + res shl 15 result = cast[Hash](res) +proc hiXorLo(A, B: uint64): uint64 {.inline.} = + # Xor of high & low 8B of full 16B product + when defined(gcc) or defined(llvm_gcc) or defined(clang): + {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r; """.} + elif defined(windows) and not defined(tcc): + {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} + else: # Fall back for weaker platforms/compilers, e.g. Nim VM, etc. + let + ha = A shr 32 + hb = B shr 32 + la = A and 0xFFFFFFFF'u64 + lb = B and 0xFFFFFFFF'u64 + rh = ha * hb + rm0 = ha * lb + rm1 = hb * la + rl = la * lb + t = rl + (rm0 shl 32) + var c = if t < rl: 1'u64 else: 0'u64 + let lo = t + (rm1 shl 32) + c += (if lo < t: 1'u64 else: 0'u64) + let hi = rh + (rm0 shr 32) + (rm1 shr 32) + c + return hi xor lo + +proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = + ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more + ## details. This passed all scrambling tests in Spring 2019 and is simple. + ## NOTE: It's ok to define ``proc(x: int16): Hash = hashWangYi1(Hash(x))``. + const P0 = 0xa0761d6478bd642f'u64 + const P1 = 0xe7037ed1a0b428db'u64 + const P5x8 = 0xeb44accab455d165'u64 xor 8'u64 + Hash(hiXorLo(hiXorLo(P0, uint64(x) xor P1), P5x8)) + proc hashData*(data: pointer, size: int): Hash = ## Hashes an array of bytes of size `size`. var h: Hash = 0 @@ -112,9 +144,14 @@ proc hash*[T: proc](x: T): Hash {.inline.} = else: result = hash(pointer(x)) -proc hash*[T: Ordinal](x: T): Hash {.inline.} = - ## Efficient hashing of integers. - cast[Hash](ord(x)) +when defined(hashWangYi1): + proc hash*[T: Ordinal](x: T): Hash {.inline.} = + ## Efficient hashing of integers. + hashWangYi1(uint64(ord(x))) +else: + proc hash*[T: Ordinal](x: T): Hash {.inline.} = + ## Efficient hashing of integers. + cast[Hash](ord(x)) proc hash*(x: float): Hash {.inline.} = ## Efficient hashing of floats. From 4b0c41ad58e5d9cd5b7eee0e78995a1a342ee378 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 1 Apr 2020 07:34:08 -0400 Subject: [PATCH 04/29] Re-organize to work around `when nimvm` limitations; Add some tests; Add a changelog.md entry. --- changelog.md | 3 +++ lib/pure/hashes.nim | 56 +++++++++++++++++++++++----------------- tests/stdlib/thashes.nim | 14 ++++++++++ 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/changelog.md b/changelog.md index 78c707bf41261..174082f0640e1 100644 --- a/changelog.md +++ b/changelog.md @@ -10,6 +10,9 @@ "undefined reference to `__builtin_saddll_overflow`" compile your programs with `-d:nimEmulateOverflowChecks`. +- The default hash for `Ordinal` has changed to something more bit-scrambling. + `import hashes; proc hash(x: myInt): Hash = hashIdentity(x)` recovers the old + one in an instantiation context while `-d:nimV1hash` recovers it globally. ### Breaking changes in the standard library diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 1fa0627e3d7a1..68d5649d51e8e 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -70,28 +70,34 @@ proc `!$`*(h: Hash): Hash {.inline.} = res = res + res shl 15 result = cast[Hash](res) +proc hiXorLoFallback(A, B: uint64): uint64 {.inline.} = + let # Fall back for weaker platforms/compilers, e.g. Nim VM, etc. + ha = A shr 32 + hb = B shr 32 + la = A and 0xFFFFFFFF'u64 + lb = B and 0xFFFFFFFF'u64 + rh = ha * hb + rm0 = ha * lb + rm1 = hb * la + rl = la * lb + t = rl + (rm0 shl 32) + var c = if t < rl: 1'u64 else: 0'u64 + let lo = t + (rm1 shl 32) + c += (if lo < t: 1'u64 else: 0'u64) + let hi = rh + (rm0 shr 32) + (rm1 shr 32) + c + return hi xor lo + proc hiXorLo(A, B: uint64): uint64 {.inline.} = # Xor of high & low 8B of full 16B product - when defined(gcc) or defined(llvm_gcc) or defined(clang): - {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r; """.} - elif defined(windows) and not defined(tcc): - {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} - else: # Fall back for weaker platforms/compilers, e.g. Nim VM, etc. - let - ha = A shr 32 - hb = B shr 32 - la = A and 0xFFFFFFFF'u64 - lb = B and 0xFFFFFFFF'u64 - rh = ha * hb - rm0 = ha * lb - rm1 = hb * la - rl = la * lb - t = rl + (rm0 shl 32) - var c = if t < rl: 1'u64 else: 0'u64 - let lo = t + (rm1 shl 32) - c += (if lo < t: 1'u64 else: 0'u64) - let hi = rh + (rm0 shr 32) + (rm1 shr 32) + c - return hi xor lo + when nimvm: + result = hiXorLoFallback(A, B) # `result =` is necessary here. + else: + when defined(gcc) or defined(llvm_gcc) or defined(clang): + {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} + elif defined(windows) and not defined(tcc): + {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} + else: + result = hiXorLoFallback(A, B: uint64) proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more @@ -144,14 +150,18 @@ proc hash*[T: proc](x: T): Hash {.inline.} = else: result = hash(pointer(x)) -when defined(hashWangYi1): +proc hashIdentity*[T: Ordinal](x: T): Hash {.inline.} = + ## The identity hash. I.e. ``hashIdentity(x) = x``. + cast[Hash](ord(x)) + +when defined(nimV1hash): proc hash*[T: Ordinal](x: T): Hash {.inline.} = ## Efficient hashing of integers. - hashWangYi1(uint64(ord(x))) + hashIdentity(x) else: proc hash*[T: Ordinal](x: T): Hash {.inline.} = ## Efficient hashing of integers. - cast[Hash](ord(x)) + hashWangYi1(uint64(ord(x))) proc hash*(x: float): Hash {.inline.} = ## Efficient hashing of floats. diff --git a/tests/stdlib/thashes.nim b/tests/stdlib/thashes.nim index 2cd1e4e479994..112c927237153 100644 --- a/tests/stdlib/thashes.nim +++ b/tests/stdlib/thashes.nim @@ -15,3 +15,17 @@ suite "hashes": test "0.0 and -0.0 should have the same hash value": var dummy = 0.0 check hash(dummy) == hash(-dummy) + + test "VM and runtime should make the same hash value (hashIdentity)": + const hi123 = hashIdentity(123) + check hashIdentity(123) == hi123 + + test "VM and runtime should make the same hash value (hashWangYi1)": + const wy123 = hashWangYi1(123) + check hashWangYi1(123) == wy123 + + test "hashIdentity value incorrect at 456": + check hashIdentity(456) == 456 + + test "hashWangYi1 value incorrect at 456": + check hashWangYi1(456) == -6421749900419628582 From a7cada90daf32b994558eab546a8a10638700d24 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 1 Apr 2020 07:44:02 -0400 Subject: [PATCH 05/29] Add less than 64-bit CPU when fork. --- lib/pure/hashes.nim | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 68d5649d51e8e..1ffffae91fb4e 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -92,7 +92,9 @@ proc hiXorLo(A, B: uint64): uint64 {.inline.} = when nimvm: result = hiXorLoFallback(A, B) # `result =` is necessary here. else: - when defined(gcc) or defined(llvm_gcc) or defined(clang): + when int.sizeof < 8: + result = hiXorLoFallback(A, B: uint64) + elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} From 4ed8e1aa23af5fcf8fc917f3bd04d097e12beb6b Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 1 Apr 2020 07:50:58 -0400 Subject: [PATCH 06/29] Fix decl instead of call typo. --- lib/pure/hashes.nim | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 1ffffae91fb4e..1e5d58026a279 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -93,13 +93,13 @@ proc hiXorLo(A, B: uint64): uint64 {.inline.} = result = hiXorLoFallback(A, B) # `result =` is necessary here. else: when int.sizeof < 8: - result = hiXorLoFallback(A, B: uint64) + result = hiXorLoFallback(A, B) elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} else: - result = hiXorLoFallback(A, B: uint64) + result = hiXorLoFallback(A, B) proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more From 875e7fb704acb55588e5f23124ce50d4a33e3ee2 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 1 Apr 2020 08:46:19 -0400 Subject: [PATCH 07/29] First attempt at fixing range error on 32-bit platforms; Still do the arithmetic in doubled up 64-bit, but truncate the hash to the lower 32-bits, but then still return `uint64` to be the same. So, type correct but truncated hash value. Update `thashes.nim` as well. --- lib/pure/hashes.nim | 4 ++-- tests/stdlib/thashes.nim | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 1e5d58026a279..a8bb9528ed026 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -92,8 +92,8 @@ proc hiXorLo(A, B: uint64): uint64 {.inline.} = when nimvm: result = hiXorLoFallback(A, B) # `result =` is necessary here. else: - when int.sizeof < 8: - result = hiXorLoFallback(A, B) + when Hash.sizeof < 8: + result = uint64(Hash(hiXorLoFallback(A, B))) elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): diff --git a/tests/stdlib/thashes.nim b/tests/stdlib/thashes.nim index 112c927237153..259ced2aada6c 100644 --- a/tests/stdlib/thashes.nim +++ b/tests/stdlib/thashes.nim @@ -28,4 +28,7 @@ suite "hashes": check hashIdentity(456) == 456 test "hashWangYi1 value incorrect at 456": - check hashWangYi1(456) == -6421749900419628582 + when Hash.sizeof < 8: + check hashWangYi1(456) == 1293320666 + else: + check hashWangYi1(456) == -6421749900419628582 From b3510c3ee823da2df9b6ee5eb3aa6d967dd64616 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 1 Apr 2020 09:58:56 -0400 Subject: [PATCH 08/29] A second try at making 32-bit mode CI work. --- lib/pure/hashes.nim | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index a8bb9528ed026..babb2227c351a 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -72,15 +72,15 @@ proc `!$`*(h: Hash): Hash {.inline.} = proc hiXorLoFallback(A, B: uint64): uint64 {.inline.} = let # Fall back for weaker platforms/compilers, e.g. Nim VM, etc. - ha = A shr 32 - hb = B shr 32 - la = A and 0xFFFFFFFF'u64 - lb = B and 0xFFFFFFFF'u64 - rh = ha * hb - rm0 = ha * lb - rm1 = hb * la - rl = la * lb - t = rl + (rm0 shl 32) + ha: uint64 = A shr 32 + hb: uint64 = B shr 32 + la: uint64 = A and 0xFFFFFFFF'u64 + lb: uint64 = B and 0xFFFFFFFF'u64 + rh: uint64 = ha * hb + rm0: uint64 = ha * lb + rm1: uint64 = hb * la + rl: uint64 = la * lb + t: uint64 = rl + (rm0 shl 32) var c = if t < rl: 1'u64 else: 0'u64 let lo = t + (rm1 shl 32) c += (if lo < t: 1'u64 else: 0'u64) @@ -93,7 +93,7 @@ proc hiXorLo(A, B: uint64): uint64 {.inline.} = result = hiXorLoFallback(A, B) # `result =` is necessary here. else: when Hash.sizeof < 8: - result = uint64(Hash(hiXorLoFallback(A, B))) + result = hiXorLoFallback(A, B) elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): From 38fe03c8ddf7f47cdc928c473ccd37371b42baf6 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 04:57:52 -0400 Subject: [PATCH 09/29] Use a more systematic identifier convention than Wang Yi's code. --- lib/pure/hashes.nim | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index babb2227c351a..2dfefb56165f7 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -70,36 +70,36 @@ proc `!$`*(h: Hash): Hash {.inline.} = res = res + res shl 15 result = cast[Hash](res) -proc hiXorLoFallback(A, B: uint64): uint64 {.inline.} = - let # Fall back for weaker platforms/compilers, e.g. Nim VM, etc. - ha: uint64 = A shr 32 - hb: uint64 = B shr 32 - la: uint64 = A and 0xFFFFFFFF'u64 - lb: uint64 = B and 0xFFFFFFFF'u64 - rh: uint64 = ha * hb - rm0: uint64 = ha * lb - rm1: uint64 = hb * la - rl: uint64 = la * lb - t: uint64 = rl + (rm0 shl 32) - var c = if t < rl: 1'u64 else: 0'u64 - let lo = t + (rm1 shl 32) +proc hiXorLoFallback64(a, b: uint64): uint64 {.inline.} = + let # Fall back in 64-bit arithmetic + aH = a shr 32 + aL = a and 0xFFFFFFFF'u64 + bH = b shr 32 + bL = b and 0xFFFFFFFF'u64 + rHH = aH * bH + rHL = aH * bL + rLH = aL * bH + rLL = aL * bL + t = rLL + (rHL shl 32) + var c = if t < rLL: 1'u64 else: 0'u64 + let lo = t + (rLH shl 32) c += (if lo < t: 1'u64 else: 0'u64) - let hi = rh + (rm0 shr 32) + (rm1 shr 32) + c + let hi = rHH + (rHL shr 32) + (rLH shr 32) + c return hi xor lo -proc hiXorLo(A, B: uint64): uint64 {.inline.} = +proc hiXorLo(a, b: uint64): uint64 {.inline.} = # Xor of high & low 8B of full 16B product when nimvm: - result = hiXorLoFallback(A, B) # `result =` is necessary here. + result = hiXorLoFallback64(a, b) # `result =` is necessary here. else: when Hash.sizeof < 8: - result = hiXorLoFallback(A, B) + result = hiXorLoFallback64(a, b) elif defined(gcc) or defined(llvm_gcc) or defined(clang): - {.emit: """__uint128_t r = A; r *= B; return (r >> 64) ^ r;""".} + {.emit: """__uint128_t r = a; r *= b; return (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): - {.emit: """A = _umul128(A, B, &B); return A ^ B;""".} + {.emit: """a = _umul128(a, b, &b); return a ^ b;""".} else: - result = hiXorLoFallback(A, B) + result = hiXorLoFallback64(a, b) proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more @@ -108,7 +108,7 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = const P0 = 0xa0761d6478bd642f'u64 const P1 = 0xe7037ed1a0b428db'u64 const P5x8 = 0xeb44accab455d165'u64 xor 8'u64 - Hash(hiXorLo(hiXorLo(P0, uint64(x) xor P1), P5x8)) + cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P5x8)) proc hashData*(data: pointer, size: int): Hash = ## Hashes an array of bytes of size `size`. From 1474ae46e83f08238dbc535201d06e139c2e83e0 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 06:21:55 -0400 Subject: [PATCH 10/29] Fix test that was wrong for as long as `toHashSet` used `rightSize` (a very long time, I think). `$a`/`$b` depend on iteration order which varies with table range reduced hash order which varies with range for some `hash()`. With 3 elements, 3!=6 is small and we've just gotten lucky with past experimental `hash()` changes. An alternate fix here would be to not stringify but use the HashSet operators, but it is not clear that doesn't alter the "spirit" of the test. --- lib/pure/collections/sets.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index 8c04a05e3c852..b92dc2df78e21 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -1008,7 +1008,7 @@ when isMainModule and not defined(release): block toSeqAndString: var a = toHashSet([2, 7, 5]) - var b = initHashSet[int]() + var b = initHashSet[int](rightSize(3)) for x in [2, 7, 5]: b.incl(x) assert($a == $b) #echo a From b78e18d63e19dee0c9adbe08169f2da70b683783 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 07:06:11 -0400 Subject: [PATCH 11/29] Fix another stringified test depending upon hash order. --- tests/collections/tcollections.nim | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index 5c95cee28f0ba..11c9aa58d4ff9 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -76,8 +76,11 @@ doAssert $(toOrderedSet(["1", "2", "3"])) == """{"1", "2", "3"}""" doAssert $(toOrderedSet(['1', '2', '3'])) == """{'1', '2', '3'}""" # Tests for tables -doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +when defined(nimV1hash): + doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" +else: + doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" +doAssert $({"1": 1, "2": 2}.toTable) == """{"2": 2, "1": 1}""" # Tests for deques block: From 29424cc8adbb644208592e3435ffbe6b89b0d075 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 07:29:24 -0400 Subject: [PATCH 12/29] Oops - revert the string-keyed test. --- tests/collections/tcollections.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index 11c9aa58d4ff9..d528affe4f0c2 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -80,7 +80,7 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"2": 2, "1": 1}""" +doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques block: From add55fea94d621ad510887047efea1b3d99be8f3 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 07:52:44 -0400 Subject: [PATCH 13/29] Fix another stringify test depending on hash order. --- tests/collections/tcollections_to_string.nim | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/collections/tcollections_to_string.nim b/tests/collections/tcollections_to_string.nim index 8915e9fea02ea..e06d1a92c56cf 100644 --- a/tests/collections/tcollections_to_string.nim +++ b/tests/collections/tcollections_to_string.nim @@ -27,7 +27,10 @@ doAssert $(toOrderedSet(["1", "2", "3"])) == """{"1", "2", "3"}""" doAssert $(toOrderedSet(['1', '2', '3'])) == """{'1', '2', '3'}""" # Tests for tables -doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" +when defined(nimV1hash): + doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" +else: + doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques From 00a708e56374109efd11b3b81ab42a00ff4e162f Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 11:28:53 -0400 Subject: [PATCH 14/29] Add a better than always zero `defined(js)` branch. --- lib/pure/hashes.nim | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 2dfefb56165f7..d64ce06b0cf12 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -92,7 +92,16 @@ proc hiXorLo(a, b: uint64): uint64 {.inline.} = when nimvm: result = hiXorLoFallback64(a, b) # `result =` is necessary here. else: - when Hash.sizeof < 8: + when defined(js): + # When backend uses `BigInt` for `uint64` we can drop convert to `Number` + # and match other backends. Low 32-bits seem pretty random-ish for now. + asm """ + var prod = BigInt(`a`) * BigInt(`b`); + var mask = (BigInt(1) << BigInt(64)) - BigInt(1); + var res = (prod >> BigInt(64)) ^ (prod & mask); + `result` = Number(res & ((BigInt(1) << BigInt(53)) - BigInt(1))); + """ + elif Hash.sizeof < 8: result = hiXorLoFallback64(a, b) elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = a; r *= b; return (r >> 64) ^ r;""".} From 2febb78571e67f7a7e74e77c3a06e88d2925f9a5 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 2 Apr 2020 12:25:44 -0400 Subject: [PATCH 15/29] It turns out to be easy to just work all in `BigInt` inside JS and thus guarantee the same low order bits of output hashes (for `isSafeInteger` input numbers). Since `hashWangYi1` output bits are equally random in all their bits, this means that tables will be safely scrambled for table sizes up to 2**32 or 4 gigaentries which is probably fine, as long as the integer keys are all < 2**53 (also likely fine). (I'm unsure why the infidelity with C/C++ back ends cut off is 32, not 53 bits.) Since HashSet & Table only use the low order bits, a quick corollary of this is that `$` on most int-keyed sets/tables will be the same in all the various back ends which seems a nice-to-have trait. --- lib/pure/hashes.nim | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index d64ce06b0cf12..1608e28c58db2 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -92,16 +92,7 @@ proc hiXorLo(a, b: uint64): uint64 {.inline.} = when nimvm: result = hiXorLoFallback64(a, b) # `result =` is necessary here. else: - when defined(js): - # When backend uses `BigInt` for `uint64` we can drop convert to `Number` - # and match other backends. Low 32-bits seem pretty random-ish for now. - asm """ - var prod = BigInt(`a`) * BigInt(`b`); - var mask = (BigInt(1) << BigInt(64)) - BigInt(1); - var res = (prod >> BigInt(64)) ^ (prod & mask); - `result` = Number(res & ((BigInt(1) << BigInt(53)) - BigInt(1))); - """ - elif Hash.sizeof < 8: + when Hash.sizeof < 8: result = hiXorLoFallback64(a, b) elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = a; r *= b; return (r >> 64) ^ r;""".} @@ -114,10 +105,24 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more ## details. This passed all scrambling tests in Spring 2019 and is simple. ## NOTE: It's ok to define ``proc(x: int16): Hash = hashWangYi1(Hash(x))``. - const P0 = 0xa0761d6478bd642f'u64 - const P1 = 0xe7037ed1a0b428db'u64 - const P5x8 = 0xeb44accab455d165'u64 xor 8'u64 - cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P5x8)) + when defined(js) and not nimvm: + asm """ + function hi_xor_lo_js(a, b) { + var prod = BigInt(a) * BigInt(b); + var mask = (BigInt(1) << BigInt(64)) - BigInt(1); + return (prod >> BigInt(64)) ^ (prod & mask); + } + const P0 = BigInt(0xa0761d64)< Date: Fri, 3 Apr 2020 08:12:05 -0400 Subject: [PATCH 16/29] These string hash tests fail for me locally. Maybe this is what causes the CI hang for testament pcat collections? --- tests/collections/tcollections.nim | 3 ++- tests/collections/tcollections_to_string.nim | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index d528affe4f0c2..c06c9c8cd6747 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -80,7 +80,8 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +when defined(stringHashTest): + doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques block: diff --git a/tests/collections/tcollections_to_string.nim b/tests/collections/tcollections_to_string.nim index e06d1a92c56cf..02e7927ce423b 100644 --- a/tests/collections/tcollections_to_string.nim +++ b/tests/collections/tcollections_to_string.nim @@ -31,7 +31,8 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +when defined(stringHashTest): + doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques block: From f5aae618191ba87642c9a2b379dfb6e25e2c10d5 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Fri, 3 Apr 2020 08:27:37 -0400 Subject: [PATCH 17/29] Oops. That failure was from me manually patching string hash in hashes. Revert. --- tests/collections/tcollections.nim | 3 +-- tests/collections/tcollections_to_string.nim | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index c06c9c8cd6747..d528affe4f0c2 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -80,8 +80,7 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -when defined(stringHashTest): - doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques block: diff --git a/tests/collections/tcollections_to_string.nim b/tests/collections/tcollections_to_string.nim index 02e7927ce423b..e06d1a92c56cf 100644 --- a/tests/collections/tcollections_to_string.nim +++ b/tests/collections/tcollections_to_string.nim @@ -31,8 +31,7 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -when defined(stringHashTest): - doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" # Tests for deques block: From 131667ac6578c08c9ec67b348869c3f7e8fa4698 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Sat, 4 Apr 2020 09:32:10 -0400 Subject: [PATCH 18/29] Import more test improvements from https://github.com/nim-lang/Nim/pull/13410 --- lib/pure/collections/sets.nim | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim index b92dc2df78e21..11639df3beebf 100644 --- a/lib/pure/collections/sets.nim +++ b/lib/pure/collections/sets.nim @@ -334,8 +334,7 @@ proc pop*[A](s: var HashSet[A]): A = ## * `clear proc <#clear,HashSet[A]>`_ runnableExamples: var s = toHashSet([2, 1]) - assert s.pop == 1 - assert s.pop == 2 + assert [s.pop, s.pop] in [[1, 2], [2,1]] # order unspecified doAssertRaises(KeyError, echo s.pop) for h in 0 .. high(s.data): @@ -1008,7 +1007,7 @@ when isMainModule and not defined(release): block toSeqAndString: var a = toHashSet([2, 7, 5]) - var b = initHashSet[int](rightSize(3)) + var b = initHashSet[int](rightSize(a.len)) for x in [2, 7, 5]: b.incl(x) assert($a == $b) #echo a From fca9bb5c088141ae3c0c00ffc24e16b1ec2d8c0f Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Sat, 4 Apr 2020 10:24:33 -0400 Subject: [PATCH 19/29] Fix bug where I swapped order when reverting the test. Ack. --- tests/collections/tcollections.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index d528affe4f0c2..11c9aa58d4ff9 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -80,7 +80,7 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"1": 1, "2": 2}""" +doAssert $({"1": 1, "2": 2}.toTable) == """{"2": 2, "1": 1}""" # Tests for deques block: From 1f11f9c04b468cd0cb757b5ca7ef5ece3d0e8846 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Sat, 4 Apr 2020 10:28:01 -0400 Subject: [PATCH 20/29] Oh, just accept either order like more and more hash tests. --- tests/collections/tcollections.nim | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index 11c9aa58d4ff9..3693397d4cce0 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -80,7 +80,8 @@ when defined(nimV1hash): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" -doAssert $({"1": 1, "2": 2}.toTable) == """{"2": 2, "1": 1}""" +let tabStr = $({"1": 1, "2": 2}.toTable) +doAssert (tabStr == """{"2": 2, "1": 1}""" or tabStr == """{"1": 1, "2": 2}""") # Tests for deques block: From 0ca8acc87101eb8fd4ab8dfbebe753b3d18c48f3 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 8 Apr 2020 06:31:45 -0400 Subject: [PATCH 21/29] Iterate in the same order. --- lib/pure/sugar.nim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/pure/sugar.nim b/lib/pure/sugar.nim index e9e7eb455317c..4b3db678248aa 100644 --- a/lib/pure/sugar.nim +++ b/lib/pure/sugar.nim @@ -291,7 +291,7 @@ macro collect*(init, body: untyped): untyped {.since: (1, 1).} = let z = collect(initTable(2)): for i, d in data.pairs: {i: d} - assert z == {1: "word", 0: "bird"}.toTable + assert z == {0: "bird", 1: "word"}.toTable # analyse the body, find the deepest expression 'it' and replace it via # 'result.add it' let res = genSym(nskVar, "collectResult") @@ -334,8 +334,8 @@ when isMainModule: let data = @["bird", "word"] # if this gets stuck in your head, its not my fault assert collect(newSeq, for (i, d) in data.pairs: (if i mod 2 == 0: d)) == @["bird"] - assert collect(initTable(2), for (i, d) in data.pairs: {i: d}) == {1: "word", - 0: "bird"}.toTable + assert collect(initTable(2), for (i, d) in data.pairs: {i: d}) == {0: "bird", + 1: "word"}.toTable assert initHashSet.collect(for d in data.items: {d}) == data.toHashSet let x = collect(newSeqOfCap(4)): From 41dd4a61d9d72a6a15346afdb6d80ce31b3f8d0f Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 8 Apr 2020 12:27:03 -0400 Subject: [PATCH 22/29] `return` inside `emit` made us skip `popFrame` causing weird troubles. --- lib/pure/hashes.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 1608e28c58db2..aef0572cae498 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -95,7 +95,7 @@ proc hiXorLo(a, b: uint64): uint64 {.inline.} = when Hash.sizeof < 8: result = hiXorLoFallback64(a, b) elif defined(gcc) or defined(llvm_gcc) or defined(clang): - {.emit: """__uint128_t r = a; r *= b; return (r >> 64) ^ r;""".} + {.emit: """__uint128_t r = a; r *= b; `result` = (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): {.emit: """a = _umul128(a, b, &b); return a ^ b;""".} else: From 137e1f19c3d5a7d1677d6f8c8d344efec2a9a198 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Wed, 8 Apr 2020 12:29:26 -0400 Subject: [PATCH 23/29] Oops - do Windows branch also. --- lib/pure/hashes.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index aef0572cae498..a0bb374e8790c 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -97,7 +97,7 @@ proc hiXorLo(a, b: uint64): uint64 {.inline.} = elif defined(gcc) or defined(llvm_gcc) or defined(clang): {.emit: """__uint128_t r = a; r *= b; `result` = (r >> 64) ^ r;""".} elif defined(windows) and not defined(tcc): - {.emit: """a = _umul128(a, b, &b); return a ^ b;""".} + {.emit: """a = _umul128(a, b, &b); `result` = a ^ b;""".} else: result = hiXorLoFallback64(a, b) From b5252467057dd68fc1943bdad975c83a94477f01 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Thu, 9 Apr 2020 02:40:43 -0400 Subject: [PATCH 24/29] `nimV1hash` -> multiply-mnemonic, type-scoped `nimIntHash1` (mnemonic resolutions are "1 == identity", 1 for Nim Version 1, 1 for first/simplest/fastest in a series of possibilities. Should be very easy to remember.) --- changelog.md | 2 +- lib/pure/hashes.nim | 2 +- tests/collections/tcollections.nim | 2 +- tests/collections/tcollections_to_string.nim | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/changelog.md b/changelog.md index b04e24baa2b44..6736bc562877a 100644 --- a/changelog.md +++ b/changelog.md @@ -6,7 +6,7 @@ - The default hash for `Ordinal` has changed to something more bit-scrambling. `import hashes; proc hash(x: myInt): Hash = hashIdentity(x)` recovers the old - one in an instantiation context while `-d:nimV1hash` recovers it globally. + one in an instantiation context while `-d:nimIntHash1` recovers it globally. ## Language changes diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index a0bb374e8790c..55020bba72f53 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -170,7 +170,7 @@ proc hashIdentity*[T: Ordinal](x: T): Hash {.inline.} = ## The identity hash. I.e. ``hashIdentity(x) = x``. cast[Hash](ord(x)) -when defined(nimV1hash): +when defined(nimIntHash1): proc hash*[T: Ordinal](x: T): Hash {.inline.} = ## Efficient hashing of integers. hashIdentity(x) diff --git a/tests/collections/tcollections.nim b/tests/collections/tcollections.nim index 3693397d4cce0..1e2858fb49876 100644 --- a/tests/collections/tcollections.nim +++ b/tests/collections/tcollections.nim @@ -76,7 +76,7 @@ doAssert $(toOrderedSet(["1", "2", "3"])) == """{"1", "2", "3"}""" doAssert $(toOrderedSet(['1', '2', '3'])) == """{'1', '2', '3'}""" # Tests for tables -when defined(nimV1hash): +when defined(nimIntHash1): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" diff --git a/tests/collections/tcollections_to_string.nim b/tests/collections/tcollections_to_string.nim index e06d1a92c56cf..62ba87334a437 100644 --- a/tests/collections/tcollections_to_string.nim +++ b/tests/collections/tcollections_to_string.nim @@ -27,7 +27,7 @@ doAssert $(toOrderedSet(["1", "2", "3"])) == """{"1", "2", "3"}""" doAssert $(toOrderedSet(['1', '2', '3'])) == """{'1', '2', '3'}""" # Tests for tables -when defined(nimV1hash): +when defined(nimIntHash1): doAssert $({1: "1", 2: "2"}.toTable) == """{1: "1", 2: "2"}""" else: doAssert $({1: "1", 2: "2"}.toTable) == """{2: "2", 1: "1"}""" From aa30554260022205ed71b1f92f0d27199d783f98 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 14 Apr 2020 06:55:17 -0400 Subject: [PATCH 25/29] Re-organize `when nimvm` logic to be a strict `when`-`else`. --- lib/pure/hashes.nim | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 55020bba72f53..6d655f6e51c07 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -105,24 +105,28 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more ## details. This passed all scrambling tests in Spring 2019 and is simple. ## NOTE: It's ok to define ``proc(x: int16): Hash = hashWangYi1(Hash(x))``. - when defined(js) and not nimvm: - asm """ - function hi_xor_lo_js(a, b) { - var prod = BigInt(a) * BigInt(b); - var mask = (BigInt(1) << BigInt(64)) - BigInt(1); - return (prod >> BigInt(64)) ^ (prod & mask); - } - const P0 = BigInt(0xa0761d64)<> BigInt(64)) ^ (prod & mask); + } + const P0 = BigInt(0xa0761d64)< Date: Tue, 14 Apr 2020 07:05:34 -0400 Subject: [PATCH 26/29] Merge other changes. --- changelog.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/changelog.md b/changelog.md index 6736bc562877a..d75e8d17b5084 100644 --- a/changelog.md +++ b/changelog.md @@ -4,6 +4,9 @@ ## Standard library additions and changes +- `uri` adds Data URI Base64, implements RFC-2397. +- Add [DOM Parser](https://developer.mozilla.org/en-US/docs/Web/API/DOMParser) + to the `dom` module for the JavaScript target. - The default hash for `Ordinal` has changed to something more bit-scrambling. `import hashes; proc hash(x: myInt): Hash = hashIdentity(x)` recovers the old one in an instantiation context while `-d:nimIntHash1` recovers it globally. From 67eab1284ea34a14729092d5eed1c6b57499f779 Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 14 Apr 2020 07:21:38 -0400 Subject: [PATCH 27/29] Lift constants to a common area. --- lib/pure/hashes.nim | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index 6d655f6e51c07..d023bbcc35325 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -105,10 +105,10 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = ## Wang Yi's hash_v1 for 8B int. https://github.com/rurban/smhasher has more ## details. This passed all scrambling tests in Spring 2019 and is simple. ## NOTE: It's ok to define ``proc(x: int16): Hash = hashWangYi1(Hash(x))``. + const P0 = 0xa0761d6478bd642f'u64 + const P1 = 0xe7037ed1a0b428db'u64 + const P58 = 0xeb44accab455d165'u64 xor 8'u64 when nimvm: - const P0 = 0xa0761d6478bd642f'u64 - const P1 = 0xe7037ed1a0b428db'u64 - const P58 = 0xeb44accab455d165'u64 xor 8'u64 cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58)) else: when defined(js): @@ -123,9 +123,6 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = var res = hi_xor_lo_js(hi_xor_lo_js(P0, BigInt(`x`) ^ P1), P58); `result` = Number(res & ((BigInt(1) << BigInt(53)) - BigInt(1)));""" else: - const P0 = 0xa0761d6478bd642f'u64 - const P1 = 0xe7037ed1a0b428db'u64 - const P58 = 0xeb44accab455d165'u64 xor 8'u64 cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58)) proc hashData*(data: pointer, size: int): Hash = From f3c236ea8e954d271e505e6d7b496012b3776cdd Mon Sep 17 00:00:00 2001 From: Charles Blake Date: Tue, 14 Apr 2020 09:04:10 -0400 Subject: [PATCH 28/29] Fall back to identity hash when `BigInt` is unavailable. --- lib/pure/hashes.nim | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim index d023bbcc35325..d6a7c7e41d363 100644 --- a/lib/pure/hashes.nim +++ b/lib/pure/hashes.nim @@ -112,16 +112,21 @@ proc hashWangYi1*(x: int64|uint64|Hash): Hash {.inline.} = cast[Hash](hiXorLo(hiXorLo(P0, uint64(x) xor P1), P58)) else: when defined(js): - asm """function hi_xor_lo_js(a, b) { - const prod = BigInt(a) * BigInt(b); - const mask = (BigInt(1) << BigInt(64)) - BigInt(1); - return (prod >> BigInt(64)) ^ (prod & mask); - } - const P0 = BigInt(0xa0761d64)<> BigInt(64)) ^ (prod & mask); + } + const P0 = BigInt(0xa0761d64)< Date: Wed, 15 Apr 2020 08:13:52 -0400 Subject: [PATCH 29/29] Increase timeout slightly (probably just real-time perturbation of CI system performance). --- tests/vm/tslow_tables.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/vm/tslow_tables.nim b/tests/vm/tslow_tables.nim index f726c63231387..a933021e0fea1 100644 --- a/tests/vm/tslow_tables.nim +++ b/tests/vm/tslow_tables.nim @@ -1,5 +1,5 @@ discard """ - timeout: "4" + timeout: "5" action: "compile" nimout: '''create search