From 306b32689635562885a3fd526d879b8825dd7b58 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Fri, 14 Feb 2020 23:29:17 -0800
Subject: [PATCH 01/19] _

---
 lib/pure/collections/hashcommon.nim | 14 +++++++++++---
 lib/pure/collections/tables.nim     |  3 ++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index d9a914afc868c..010b860abbade 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -26,8 +26,14 @@ proc isEmpty(hcode: Hash): bool {.inline.} =
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = (h + 1) and maxHash
+
+proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
+  # result = ((5*h) + 1 + perturb and maxHash)
+  const PERTURB_SHIFT = 5
+  perturb = perturb shr PERTURB_SHIFT
+  result = ((5*h) + 1 + perturb) and maxHash
+  # echo (h, result)
+  # result = (h + 1) and maxHash
 
 proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
@@ -37,14 +43,16 @@ template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
+  var perturb = hc
   while isFilled(t.data[h].hcode):
     # Compare hc THEN key with boolean short circuit. This makes the common case
     # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
     # It does slow down succeeding lookups by one extra Hash cmp&and..usually
     # just a few clock cycles, generally worth it for any non-integer-like A.
+    # TODO: optimize this: depending on type(key), skip hc comparison
     if t.data[h].hcode == hc and t.data[h].key == key:
       return h
-    h = nextTry(h, maxHash(t))
+    h = nextTry(h, maxHash(t), perturb)
   result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index a0cc73ad62920..d885c120b1918 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -273,8 +273,9 @@ proc enlarge[A, B](t: var Table[A, B]) =
     let eh = n[i].hcode
     if isFilled(eh):
       var j: Hash = eh and maxHash(t)
+      var perturb = eh
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       when defined(js):
         rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
       else:

From d6a63dc65a96d1e1c99275f6f7357e6c75f56623 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Sat, 15 Feb 2020 20:59:32 -0800
Subject: [PATCH 02/19] _

---
 lib/pure/collections/hashcommon.nim |  5 ++-
 lib/pure/collections/intsets.nim    | 22 ++++++++-----
 lib/pure/collections/tableimpl.nim  | 13 ++++----
 lib/pure/collections/tables.nim     | 49 ++++++++++++++++++++++++++---
 4 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 010b860abbade..4f253be38548b 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -28,12 +28,11 @@ proc isFilled(hcode: Hash): bool {.inline.} =
 
 
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # result = ((5*h) + 1 + perturb and maxHash)
+  # TODO: shouldn't perturb be unsigned?
   const PERTURB_SHIFT = 5
   perturb = perturb shr PERTURB_SHIFT
+  # perturb = cast[uint](perturb) shr PERTURB_SHIFT # TODO
   result = ((5*h) + 1 + perturb) and maxHash
-  # echo (h, result)
-  # result = (h + 1) and maxHash
 
 proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index 8338ceaca9978..02ee10607c3a1 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -54,22 +54,27 @@ proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
   result = (length * 2 < counter * 3) or (length - counter < 4)
 
-proc nextTry(h, maxHash: Hash): Hash {.inline.} =
-  result = ((5 * h) + 1) and maxHash
+# FACTOR
+proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
+  const PERTURB_SHIFT = 5
+  perturb = perturb shr PERTURB_SHIFT
+  result = ((5*h) + 1 + perturb) and maxHash
 
 proc intSetGet(t: IntSet, key: int): PTrunk =
   var h = key and t.max
+  var perturb = key
   while t.data[h] != nil:
     if t.data[h].key == key:
       return t.data[h]
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
   result = nil
 
 proc intSetRawInsert(t: IntSet, data: var TrunkSeq, desc: PTrunk) =
   var h = desc.key and t.max
+  var perturb = desc.key
   while data[h] != nil:
     assert(data[h] != desc)
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
   assert(data[h] == nil)
   data[h] = desc
 
@@ -84,14 +89,16 @@ proc intSetEnlarge(t: var IntSet) =
 
 proc intSetPut(t: var IntSet, key: int): PTrunk =
   var h = key and t.max
+  var perturb = key
   while t.data[h] != nil:
     if t.data[h].key == key:
       return t.data[h]
-    h = nextTry(h, t.max)
+    h = nextTry(h, t.max, perturb)
   if mustRehash(t.max + 1, t.counter): intSetEnlarge(t)
   inc(t.counter)
   h = key and t.max
-  while t.data[h] != nil: h = nextTry(h, t.max)
+  perturb = key
+  while t.data[h] != nil: h = nextTry(h, t.max, perturb)
   assert(t.data[h] == nil)
   new(result)
   result.next = t.head
@@ -393,7 +400,8 @@ proc assign*(dest: var IntSet, src: IntSet) =
     var it = src.head
     while it != nil:
       var h = it.key and dest.max
-      while dest.data[h] != nil: h = nextTry(h, dest.max)
+      var perturb = it.key
+      while dest.data[h] != nil: h = nextTry(h, dest.max, perturb)
       assert(dest.data[h] == nil)
       var n: PTrunk
       new(n)
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index 85bffd60f8def..6fb3cdc57802d 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,8 +14,9 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
+  var perturb = hc
   while isFilled(t.data[h].hcode):
-    h = nextTry(h, maxHash(t))
+    h = nextTry(h, maxHash(t), perturb)
   result = h
 
 template rawInsertImpl() {.dirty.} =
@@ -44,7 +45,6 @@ template addImpl(enlarge) {.dirty.} =
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
-  checkIfInitialized()
   if mustRehash(t.dataLen, t.counter):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
@@ -85,21 +85,22 @@ template delImplIdx(t, i) =
     block outer:
       while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
         var j = i         # The correctness of this depends on (h+1) in nextTry,
-        var r = j         # though may be adaptable to other simple sequences.
+                          # though may be adaptable to other simple sequences.
         t.data[i].hcode = 0              # mark current EMPTY
         t.data[i].key = default(type(t.data[i].key))
         t.data[i].val = default(type(t.data[i].val))
         while true:
-          i = (i + 1) and msk            # increment mod table size
+          i = (i + 1) and msk            # increment mod table size; PRTEMP
           if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
             break outer
-          r = t.data[i].hcode and msk    # "home" location of key@i
+          var r = t.data[i].hcode and msk    # "home" location of key@i
           if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
             break
+        # TODO: can we instead just move the last in the chain?
         when defined(js):
           t.data[j] = t.data[i]
         else:
-          t.data[j] = move(t.data[i]) # data[j] will be marked EMPTY next loop
+          t.data[j] = move(t.data[i]) # data[i] will be marked EMPTY next loop
 
 template delImpl() {.dirty.} =
   var hc: Hash
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index d885c120b1918..4b0980f172fa6 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -761,6 +761,38 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
+import intsets
+
+template inCache(): bool =
+  # TODO: instead modify how add works for table w multiple identical keys, eg usign instead hash((key, keyCount)) ?
+  if num <= N:
+    # short cache optimization: linear search, to avoid overhead of IntSet for common case
+    var found = false
+    for i in 0..<num:
+      if cache1[i] == h:
+        found = true
+        break
+    if not found:
+      if num < N:
+        cache1[num] = h
+        num.inc
+      else:
+        # replace cache1 with cache2
+        num.inc
+        cache2 = initIntSet()
+        for ai in cache1:
+          cache2.incl ai
+        cache2.incl h
+    found
+  else:
+    # large cache via IntSet to avoid quadratic behavior
+    if h notin cache2:
+      cache2.incl h
+      # optimization: we don't care about `num.inc`
+      false
+    else:
+      true
+
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
   ##
@@ -780,13 +812,22 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ##   # 10
   ##   # 20
   ##   # 30
-  var h: Hash = genHash(key) and high(t.data)
+  let hc = genHash(key)
+  var h: Hash = hc and high(t.data)
   let L = len(t)
+  var perturb = hc
+
+  var num = 0
+  const N = 0 # can optimize this
+  # const N = 20000 # can optimize this
+  var cache1 {.noinit.}: array[N, Hash]
+  var cache2: IntSet
   while isFilled(t.data[h].hcode):
     if t.data[h].key == key:
-      yield t.data[h].val
-      assert(len(t) == L, "the length of the table changed while iterating over it")
-    h = nextTry(h, high(t.data))
+      if not inCache():
+        yield t.data[h].val
+        assert(len(t) == L, "the length of the table changed while iterating over it")
+    h = nextTry(h, high(t.data), perturb)
 
 
 

From 2e594ff4c5ec9d8117ba2467548190837d0cd402 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Sun, 16 Feb 2020 00:28:46 -0800
Subject: [PATCH 03/19] _

---
 lib/pure/collections/hashcommon.nim   | 59 ++++++++++++++++------
 lib/pure/collections/sharedtables.nim |  2 +-
 lib/pure/collections/tableimpl.nim    | 70 ++++++++++++++++-----------
 lib/pure/collections/tables.nim       | 20 ++++----
 4 files changed, 99 insertions(+), 52 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 4f253be38548b..f95eff3802175 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -20,8 +20,15 @@ when not defined(nimHasDefault):
 
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
-proc isEmpty(hcode: Hash): bool {.inline.} =
-  result = hcode == 0
+when false:
+  proc isEmpty(hcode: Hash): bool {.inline.} =
+    result = hcode == 0
+
+const freeMarker = 0
+const deletedMarker = -1
+
+proc isFilledValid(hcode: Hash): bool {.inline.} =
+  result = hcode != 0 and hcode != deletedMarker # SPEED: could improve w bit magic
 
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
@@ -30,29 +37,49 @@ proc isFilled(hcode: Hash): bool {.inline.} =
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
   # TODO: shouldn't perturb be unsigned?
   const PERTURB_SHIFT = 5
-  perturb = perturb shr PERTURB_SHIFT
-  # perturb = cast[uint](perturb) shr PERTURB_SHIFT # TODO
+  # SPEED IMPROVE
+  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
+  perturb = cast[Hash](perturb2)
   result = ((5*h) + 1 + perturb) and maxHash
 
 proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
+  result = (length * 2 < counter * 3) or (length - counter < 4) # synchronize with `rightSize`
+
+proc mustRehash2[T](t: T): bool {.inline.} =
+  let counter2 = t.counter + t.countDeleted
+  # echo (t.dataLen, t.counter, t.countDeleted, counter2)
+  result = mustRehash(t.dataLen, counter2)
+  # echo ("mustRehash2", t.dataLen, t.counter, t.countDeleted, counter2, result)
 
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
   var perturb = hc
-  while isFilled(t.data[h].hcode):
-    # Compare hc THEN key with boolean short circuit. This makes the common case
-    # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
-    # It does slow down succeeding lookups by one extra Hash cmp&and..usually
-    # just a few clock cycles, generally worth it for any non-integer-like A.
-    # TODO: optimize this: depending on type(key), skip hc comparison
-    if t.data[h].hcode == hc and t.data[h].key == key:
-      return h
-    h = nextTry(h, maxHash(t), perturb)
-  result = -1 - h # < 0 => MISSING; insert idx = -1 - result
+  var deletedIndex = -1
+  while true:
+    if isFilledValid(t.data[h].hcode):
+      # Compare hc THEN key with boolean short circuit. This makes the common case
+      # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
+      # It does slow down succeeding lookups by one extra Hash cmp&and..usually
+      # just a few clock cycles, generally worth it for any non-integer-like A.
+      # TODO: optimize this: depending on type(key), skip hc comparison
+      if t.data[h].hcode == hc and t.data[h].key == key:
+        return h
+      # echo (h, perturb, maxHash(t))
+      h = nextTry(h, maxHash(t), perturb)
+    elif t.data[h].hcode == deletedMarker:
+      if deletedIndex == -1:
+        deletedIndex = h
+      h = nextTry(h, maxHash(t), perturb)
+    else:
+      break
+  if deletedIndex == -1:
+    result = -1 - h # < 0 => MISSING; insert idx = -1 - result # TODO
+  else:
+    # we prefer returning a (in fact the 1st found) deleted index
+    result = -1 - deletedIndex
 
 proc rawGetKnownHC[X, A](t: X, key: A, hc: Hash): int {.inline.} =
   rawGetKnownHCImpl()
@@ -61,6 +88,8 @@ template genHashImpl(key, hc: typed) =
   hc = hash(key)
   if hc == 0: # This almost never taken branch should be very predictable.
     hc = 314159265 # Value doesn't matter; Any non-zero favorite is fine.
+  elif hc == deletedMarker:
+    hc = 214159261
 
 template genHash(key: typed): Hash =
   var res: Hash
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index 1a614875284d8..ae623790bfa0a 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -32,7 +32,7 @@ template maxHash(t): untyped = t.dataLen-1
 include tableimpl
 
 template st_maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash2(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index # important to transform for mgetOrPutImpl
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index 6fb3cdc57802d..558999720343a 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -15,13 +15,19 @@ template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
   var perturb = hc
-  while isFilled(t.data[h].hcode):
-    h = nextTry(h, maxHash(t), perturb)
+  while true:
+    let hcode = t.data[h].hcode
+    if hcode == deletedMarker or hcode == freeMarker:
+      break
+    else:
+      h = nextTry(h, maxHash(t), perturb)
   result = h
 
-template rawInsertImpl() {.dirty.} =
+template rawInsertImpl(t) {.dirty.} =
   data[h].key = key
   data[h].val = val
+  if data[h].hcode == deletedMarker:
+    t.countDeleted.dec
   data[h].hcode = hc
 
 proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
@@ -29,7 +35,7 @@ proc rawGetDeep[X, A](t: X, key: A, hc: var Hash): int {.inline.} =
 
 proc rawInsert[X, A, B](t: var X, data: var KeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)
 
 template checkIfInitialized() =
   when compiles(defaultInitialSize):
@@ -38,14 +44,14 @@ template checkIfInitialized() =
 
 template addImpl(enlarge) {.dirty.} =
   checkIfInitialized()
-  if mustRehash(t.dataLen, t.counter): enlarge(t)
+  if mustRehash2(t): enlarge(t)
   var hc: Hash
   var j = rawGetDeep(t, key, hc)
   rawInsert(t, t.data, key, val, hc, j)
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash(t.dataLen, t.counter):
+  if mustRehash2(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index                  # important to transform for mgetOrPutImpl
@@ -82,25 +88,35 @@ template delImplIdx(t, i) =
   let msk = maxHash(t)
   if i >= 0:
     dec(t.counter)
-    block outer:
-      while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-        var j = i         # The correctness of this depends on (h+1) in nextTry,
-                          # though may be adaptable to other simple sequences.
-        t.data[i].hcode = 0              # mark current EMPTY
-        t.data[i].key = default(type(t.data[i].key))
-        t.data[i].val = default(type(t.data[i].val))
-        while true:
-          i = (i + 1) and msk            # increment mod table size; PRTEMP
-          if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
-            break outer
-          var r = t.data[i].hcode and msk    # "home" location of key@i
-          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-            break
-        # TODO: can we instead just move the last in the chain?
-        when defined(js):
-          t.data[j] = t.data[i]
-        else:
-          t.data[j] = move(t.data[i]) # data[i] will be marked EMPTY next loop
+    inc(t.countDeleted)
+    when true:
+      t.data[i].hcode = deletedMarker
+      t.data[i].key = default(type(t.data[i].key))
+      t.data[i].val = default(type(t.data[i].val))
+      # maybe not needed because counter+countDeleted doesn't change, unless formula is not a simple +
+      # if mustRehash2(t):
+      #   enlarge(t) # RENAME enlarge...
+        # i = rawGetKnownHC(t, key, hc) # TODO
+    else:
+      block outer:
+        while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
+          var j = i         # The correctness of this depends on (h+1) in nextTry,
+                            # though may be adaptable to other simple sequences.
+          t.data[i].hcode = 0              # mark current EMPTY
+          t.data[i].key = default(type(t.data[i].key))
+          t.data[i].val = default(type(t.data[i].val))
+          while true:
+            i = (i + 1) and msk            # increment mod table size
+            if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
+              break outer
+            var r = t.data[i].hcode and msk    # "home" location of key@i
+            if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
+              break
+          # TODO: can we instead just move the last in the chain?
+          when defined(js):
+            t.data[j] = t.data[i]
+          else:
+            t.data[j] = move(t.data[i]) # data[i] will be marked EMPTY next loop
 
 template delImpl() {.dirty.} =
   var hc: Hash
@@ -124,8 +140,8 @@ template initImpl(result: typed, size: int) =
     result.last = -1
 
 template insertImpl() = # for CountTable
-  if t.dataLen == 0: initImpl(t, defaultInitialSize)
-  if mustRehash(len(t.data), t.counter): enlarge(t)
+  checkIfInitialized()
+  if mustRehash2(t): enlarge(t)
   ctRawInsert(t, t.data, key, val)
   inc(t.counter)
 
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 4b0980f172fa6..00696ebda54a0 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -233,6 +233,7 @@ type
     ## For creating an empty Table, use `initTable proc<#initTable,int>`_.
     data: KeyValuePairSeq[A, B]
     counter: int
+    countDeleted: int
   TableRef*[A, B] = ref Table[A, B] ## Ref version of `Table<#Table>`_.
     ##
     ## For creating a new empty TableRef, use `newTable proc
@@ -267,11 +268,12 @@ template get(t, key): untyped =
 
 proc enlarge[A, B](t: var Table[A, B]) =
   var n: KeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
   swap(t.data, n)
+  t.countDeleted = 0
   for i in countup(0, high(n)):
     let eh = n[i].hcode
-    if isFilled(eh):
+    if isFilledValid(eh):
       var j: Hash = eh and maxHash(t)
       var perturb = eh
       while isFilled(t.data[j].hcode):
@@ -671,7 +673,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
   ##   # value: [1, 5, 7, 9]
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -693,7 +695,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -714,7 +716,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledValid(t.data[h].hcode):
       yield t.data[h].key
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -735,7 +737,7 @@ iterator values*[A, B](t: Table[A, B]): B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -757,7 +759,7 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilled(t.data[h].hcode):
+    if isFilledValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -822,7 +824,7 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   # const N = 20000 # can optimize this
   var cache1 {.noinit.}: array[N, Hash]
   var cache2: IntSet
-  while isFilled(t.data[h].hcode):
+  while isFilled(t.data[h].hcode): # CHECKME isFilledValid ??
     if t.data[h].key == key:
       if not inCache():
         yield t.data[h].val
@@ -1282,7 +1284,7 @@ proc rawGet[A, B](t: OrderedTable[A, B], key: A, hc: var Hash): int =
 proc rawInsert[A, B](t: var OrderedTable[A, B],
                      data: var OrderedKeyValuePairSeq[A, B],
                      key: A, val: B, hc: Hash, h: Hash) =
-  rawInsertImpl()
+  rawInsertImpl(t)
   data[h].next = -1
   if t.first < 0: t.first = h
   if t.last >= 0: data[t.last].next = h

From 119076ac38850c7513c90ec58d65008555e1c123 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Sun, 16 Feb 2020 01:25:03 -0800
Subject: [PATCH 04/19] _

---
 lib/pure/hashes.nim | 47 +++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index ac8498517bc65..3f805c937048a 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -112,38 +112,31 @@ proc hash*[T: proc](x: T): Hash {.inline.} =
   else:
     result = hash(pointer(x))
 
-const
-  prime = uint(11)
+proc hashUInt32*(x: uint32): Hash {.inline.} =
+  ## for internal use; user code should prefer `hash` overloads
+  # calling `hashUInt64(x)` would perform 1.736 worse, see thashes_perf toInt32
+  when nimvm: # in vmops
+    doAssert false
+  else:
+    # inspired from https://gist.github.com/badboy/6267743
+    var x = x xor ((x shr 20) xor (x shr 12))
+    result = cast[Hash](x xor (x shr 7) xor (x shr 4))
 
-proc hash*(x: int): Hash {.inline.} =
+const prime = uint(11)
+proc hash*(x: int|int64|uint|uint64|char|Ordinal): Hash {.inline.} =
   ## Efficient hashing of integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: int64): Hash {.inline.} =
-  ## Efficient hashing of `int64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: uint): Hash {.inline.} =
-  ## Efficient hashing of unsigned integers.
-  result = cast[Hash](x * prime)
-
-proc hash*(x: uint64): Hash {.inline.} =
-  ## Efficient hashing of `uint64` integers.
-  result = cast[Hash](cast[uint](x) * prime)
-
-proc hash*(x: char): Hash {.inline.} =
-  ## Efficient hashing of characters.
-  result = cast[Hash](cast[uint](ord(x)) * prime)
-
-proc hash*[T: Ordinal](x: T): Hash {.inline.} =
-  ## Efficient hashing of other ordinal types (e.g. enums).
-  result = cast[Hash](cast[uint](ord(x)) * prime)
+  when sizeof(x) < 4:
+    cast[Hash](x)
+  elif sizeof(x) == 4:
+    cast[Hash](cast[uint](x) * prime)
+    # hashUInt32(x)
+  else:
+    cast[Hash](cast[uint](x) * prime)
 
 proc hash*(x: float): Hash {.inline.} =
   ## Efficient hashing of floats.
-  var y = x + 1.0
-  result = cast[ptr Hash](addr(y))[]
-
+  var y = x + 0.0
+  result = hash(cast[ptr Hash](addr(y))[])
 
 # Forward declarations before methods that hash containers. This allows
 # containers to contain other containers

From 5458f10a903df7b34f6bd7685852dd1658c92426 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Sun, 16 Feb 2020 02:18:05 -0800
Subject: [PATCH 05/19] fixup

---
 lib/pure/collections/hashcommon.nim | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index f95eff3802175..ca19fcc9cb3b6 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -35,9 +35,8 @@ proc isFilled(hcode: Hash): bool {.inline.} =
 
 
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # TODO: shouldn't perturb be unsigned?
   const PERTURB_SHIFT = 5
-  # SPEED IMPROVE
+  # TODO: make perturb (maybe even Hash) unsigned everywhere to avoid back and forth conversions
   var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
   perturb = cast[Hash](perturb2)
   result = ((5*h) + 1 + perturb) and maxHash
@@ -48,9 +47,7 @@ proc mustRehash(length, counter: int): bool {.inline.} =
 
 proc mustRehash2[T](t: T): bool {.inline.} =
   let counter2 = t.counter + t.countDeleted
-  # echo (t.dataLen, t.counter, t.countDeleted, counter2)
   result = mustRehash(t.dataLen, counter2)
-  # echo ("mustRehash2", t.dataLen, t.counter, t.countDeleted, counter2, result)
 
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
@@ -76,7 +73,7 @@ template rawGetKnownHCImpl() {.dirty.} =
     else:
       break
   if deletedIndex == -1:
-    result = -1 - h # < 0 => MISSING; insert idx = -1 - result # TODO
+    result = -1 - h # < 0 => MISSING; insert idx = -1 - result
   else:
     # we prefer returning a (in fact the 1st found) deleted index
     result = -1 - deletedIndex

From 36abf0e6ef1962dc85da2a38749da6551dffcf8b Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Sun, 16 Feb 2020 02:35:55 -0800
Subject: [PATCH 06/19] nim compiles

---
 lib/pure/collections/hashcommon.nim |  2 +-
 lib/pure/collections/setimpl.nim    | 19 +++-------------
 lib/pure/collections/sets.nim       |  2 ++
 lib/pure/collections/tableimpl.nim  | 35 ++++++-----------------------
 lib/pure/collections/tables.nim     | 23 ++++++++++++-------
 5 files changed, 28 insertions(+), 53 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index ca19fcc9cb3b6..20e92df556a96 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -46,6 +46,7 @@ proc mustRehash(length, counter: int): bool {.inline.} =
   result = (length * 2 < counter * 3) or (length - counter < 4) # synchronize with `rightSize`
 
 proc mustRehash2[T](t: T): bool {.inline.} =
+  # static: echo $T
   let counter2 = t.counter + t.countDeleted
   result = mustRehash(t.dataLen, counter2)
 
@@ -64,7 +65,6 @@ template rawGetKnownHCImpl() {.dirty.} =
       # TODO: optimize this: depending on type(key), skip hc comparison
       if t.data[h].hcode == hc and t.data[h].key == key:
         return h
-      # echo (h, perturb, maxHash(t))
       h = nextTry(h, maxHash(t), perturb)
     elif t.data[h].hcode == deletedMarker:
       if deletedIndex == -1:
diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim
index d2d1490ff6fb4..e372f3d4c1e6d 100644
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -68,11 +68,6 @@ template containsOrInclImpl() {.dirty.} =
     rawInsert(s, s.data, key, hc, -1 - index)
     inc(s.counter)
 
-template doWhile(a, b) =
-  while true:
-    b
-    if not a: break
-
 proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   var hc: Hash
   var i = rawGet(s, key, hc)
@@ -82,17 +77,9 @@ proc exclImpl[A](s: var HashSet[A], key: A): bool {.inline.} =
   if i >= 0:
     result = false
     dec(s.counter)
-    while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-      var j = i # The correctness of this depends on (h+1) in nextTry,
-      var r = j # though may be adaptable to other simple sequences.
-      s.data[i].hcode = 0 # mark current EMPTY
-      s.data[i].key = default(type(s.data[i].key))
-      doWhile((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-        i = (i + 1) and msk # increment mod table size
-        if isEmpty(s.data[i].hcode): # end of collision cluster; So all done
-          return
-        r = s.data[i].hcode and msk # "home" location of key@i
-      s.data[j] = move(s.data[i]) # data[i] will be marked EMPTY next loop
+    inc(s.countDeleted)
+    s.data[i].hcode = deletedMarker
+    s.data[i].key = default(type(s.data[i].key))
 
 template dollarImpl() {.dirty.} =
   result = "{"
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index 9dd72cb565971..f27c955ee15df 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -68,6 +68,7 @@ type
     ## before calling other procs on it.
     data: KeyValuePairSeq[A]
     counter: int
+    countDeleted: int
 
 type
   OrderedKeyValuePair[A] = tuple[
@@ -80,6 +81,7 @@ type
     ## <#initOrderedSet,int>`_ before calling other procs on it.
     data: OrderedKeyValuePairSeq[A]
     counter, first, last: int
+    countDeleted: int
 
 const
   defaultInitialSize* = 64
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index 558999720343a..d048a2f20a099 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -89,34 +89,13 @@ template delImplIdx(t, i) =
   if i >= 0:
     dec(t.counter)
     inc(t.countDeleted)
-    when true:
-      t.data[i].hcode = deletedMarker
-      t.data[i].key = default(type(t.data[i].key))
-      t.data[i].val = default(type(t.data[i].val))
-      # maybe not needed because counter+countDeleted doesn't change, unless formula is not a simple +
-      # if mustRehash2(t):
-      #   enlarge(t) # RENAME enlarge...
-        # i = rawGetKnownHC(t, key, hc) # TODO
-    else:
-      block outer:
-        while true:         # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
-          var j = i         # The correctness of this depends on (h+1) in nextTry,
-                            # though may be adaptable to other simple sequences.
-          t.data[i].hcode = 0              # mark current EMPTY
-          t.data[i].key = default(type(t.data[i].key))
-          t.data[i].val = default(type(t.data[i].val))
-          while true:
-            i = (i + 1) and msk            # increment mod table size
-            if isEmpty(t.data[i].hcode):   # end of collision cluster; So all done
-              break outer
-            var r = t.data[i].hcode and msk    # "home" location of key@i
-            if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
-              break
-          # TODO: can we instead just move the last in the chain?
-          when defined(js):
-            t.data[j] = t.data[i]
-          else:
-            t.data[j] = move(t.data[i]) # data[i] will be marked EMPTY next loop
+    t.data[i].hcode = deletedMarker
+    t.data[i].key = default(type(t.data[i].key))
+    t.data[i].val = default(type(t.data[i].val))
+    # maybe not needed because counter+countDeleted doesn't change, unless formula is not a simple +
+    # if mustRehash2(t):
+    #   enlarge(t) # RENAME enlarge...
+      # i = rawGetKnownHC(t, key, hc) # TODO
 
 template delImpl() {.dirty.} =
   var hc: Hash
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 00696ebda54a0..07274722c5659 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -1263,6 +1263,8 @@ type
     ## <#initOrderedTable,int>`_.
     data: OrderedKeyValuePairSeq[A, B]
     counter, first, last: int
+    countDeleted: int
+
   OrderedTableRef*[A, B] = ref OrderedTable[A, B] ## Ref version of
     ## `OrderedTable<#OrderedTable>`_.
     ##
@@ -1292,18 +1294,20 @@ proc rawInsert[A, B](t: var OrderedTable[A, B],
 
 proc enlarge[A, B](t: var OrderedTable[A, B]) =
   var n: OrderedKeyValuePairSeq[A, B]
-  newSeq(n, len(t.data) * growthFactor)
+  newSeq(n, t.counter.rightSize)
   var h = t.first
   t.first = -1
   t.last = -1
   swap(t.data, n)
+  t.countDeleted = 0
   while h >= 0:
     var nxt = n[h].next
     let eh = n[h].hcode
-    if isFilled(eh):
+    if isFilledValid(eh):
       var j: Hash = eh and maxHash(t)
+      var perturb = eh
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j)
     h = nxt
 
@@ -2255,6 +2259,7 @@ type
     ## <#initCountTable,int>`_.
     data: seq[tuple[key: A, val: int]]
     counter: int
+    countDeleted: int
     isSorted: bool
   CountTableRef*[A] = ref CountTable[A] ## Ref version of
     ## `CountTable<#CountTable>`_.
@@ -2267,8 +2272,9 @@ type
 
 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
-  var h: Hash = hash(key) and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data))
+  var perturb = hash(key)
+  var h: Hash = perturb and high(data)
+  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: hadnle deletedMarker
   data[h].key = key
   data[h].val = val
 
@@ -2295,10 +2301,11 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
   if t.data.len == 0:
     return -1
-  var h: Hash = hash(key) and high(t.data) # start with real hash value
-  while t.data[h].val != 0:
+  var perturb = hash(key)
+  var h: Hash = perturb and high(t.data) # start with real hash value
+  while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
     if t.data[h].key == key: return h
-    h = nextTry(h, high(t.data))
+    h = nextTry(h, high(t.data), perturb)
   result = -1 - h # < 0 => MISSING; insert idx = -1 - result
 
 template ctget(t, key, default: untyped): untyped =

From 2e7b98de06287ab4d0492689ffdcb833dc778981 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 11:35:52 -0800
Subject: [PATCH 07/19] address comments

---
 lib/pure/collections/hashcommon.nim |  4 ++--
 lib/pure/collections/tables.nim     | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 20e92df556a96..8eac7c81c138f 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -27,7 +27,7 @@ when false:
 const freeMarker = 0
 const deletedMarker = -1
 
-proc isFilledValid(hcode: Hash): bool {.inline.} =
+proc isFilledAndValid(hcode: Hash): bool {.inline.} =
   result = hcode != 0 and hcode != deletedMarker # SPEED: could improve w bit magic
 
 proc isFilled(hcode: Hash): bool {.inline.} =
@@ -57,7 +57,7 @@ template rawGetKnownHCImpl() {.dirty.} =
   var perturb = hc
   var deletedIndex = -1
   while true:
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       # Compare hc THEN key with boolean short circuit. This makes the common case
       # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
       # It does slow down succeeding lookups by one extra Hash cmp&and..usually
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 07274722c5659..031e5c3efbae9 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -273,7 +273,7 @@ proc enlarge[A, B](t: var Table[A, B]) =
   t.countDeleted = 0
   for i in countup(0, high(n)):
     let eh = n[i].hcode
-    if isFilledValid(eh):
+    if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
       var perturb = eh
       while isFilled(t.data[j].hcode):
@@ -673,7 +673,7 @@ iterator pairs*[A, B](t: Table[A, B]): (A, B) =
   ##   # value: [1, 5, 7, 9]
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -695,7 +695,7 @@ iterator mpairs*[A, B](t: var Table[A, B]): (A, var B) =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield (t.data[h].key, t.data[h].val)
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -716,7 +716,7 @@ iterator keys*[A, B](t: Table[A, B]): A =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].key
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -737,7 +737,7 @@ iterator values*[A, B](t: Table[A, B]): B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -759,7 +759,7 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
 
   let L = len(t)
   for h in 0 .. high(t.data):
-    if isFilledValid(t.data[h].hcode):
+    if isFilledAndValid(t.data[h].hcode):
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
@@ -824,7 +824,7 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   # const N = 20000 # can optimize this
   var cache1 {.noinit.}: array[N, Hash]
   var cache2: IntSet
-  while isFilled(t.data[h].hcode): # CHECKME isFilledValid ??
+  while isFilled(t.data[h].hcode): # CHECKME isFilledAndValid ??
     if t.data[h].key == key:
       if not inCache():
         yield t.data[h].val
@@ -1303,7 +1303,7 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) =
   while h >= 0:
     var nxt = n[h].next
     let eh = n[h].hcode
-    if isFilledValid(eh):
+    if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
       var perturb = eh
       while isFilled(t.data[j].hcode):

From bc86a8024e5f0b1bbe480c2f20fc4f3b67e2a0a9 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 11:47:31 -0800
Subject: [PATCH 08/19] _

---
 lib/pure/collections/intsets.nim | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index 02ee10607c3a1..6b36553a967b2 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -51,13 +51,15 @@ type
     a: array[0..33, int] # profiling shows that 34 elements are enough
 
 proc mustRehash(length, counter: int): bool {.inline.} =
+  # FACTOR with hashcommon.mustRehash
   assert(length > counter)
   result = (length * 2 < counter * 3) or (length - counter < 4)
 
-# FACTOR
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
+  # FACTOR with hashcommon.nextTry
   const PERTURB_SHIFT = 5
-  perturb = perturb shr PERTURB_SHIFT
+  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
+  perturb = cast[Hash](perturb2)
   result = ((5*h) + 1 + perturb) and maxHash
 
 proc intSetGet(t: IntSet, key: int): PTrunk =

From 86108c091782e63d6bbee43d1b327789070f2f57 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 11:49:57 -0800
Subject: [PATCH 09/19] fixup

---
 lib/pure/collections/hashcommon.nim | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 8eac7c81c138f..905488af92138 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -18,15 +18,11 @@ when not defined(nimHasDefault):
     var v: T
     v
 
-# hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
-# two procs retain clarity of that encoding without the space cost of an enum.
-when false:
-  proc isEmpty(hcode: Hash): bool {.inline.} =
-    result = hcode == 0
-
 const freeMarker = 0
 const deletedMarker = -1
 
+# hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
+# two procs retain clarity of that encoding without the space cost of an enum.
 proc isFilledAndValid(hcode: Hash): bool {.inline.} =
   result = hcode != 0 and hcode != deletedMarker # SPEED: could improve w bit magic
 

From e08bd320e34ef5f22c597d1fd4b57faf7a3b89b7 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 14:24:36 -0800
Subject: [PATCH 10/19] fix tests

---
 lib/pure/collections/hashcommon.nim   |  1 -
 lib/pure/collections/intsets.nim      |  9 ++++-
 lib/pure/collections/sets.nim         |  8 ++--
 lib/pure/collections/sharedtables.nim |  4 +-
 lib/pure/collections/tables.nim       | 54 +++++++++------------------
 lib/pure/testutils.nim                |  8 ++++
 tests/arc/trepr.nim                   |  2 +-
 tests/collections/ttables.nim         | 13 +++++--
 tests/collections/ttablesthreads.nim  |  5 +--
 9 files changed, 53 insertions(+), 51 deletions(-)
 create mode 100644 lib/pure/testutils.nim

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 905488af92138..d7b0e73e397de 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -42,7 +42,6 @@ proc mustRehash(length, counter: int): bool {.inline.} =
   result = (length * 2 < counter * 3) or (length - counter < 4) # synchronize with `rightSize`
 
 proc mustRehash2[T](t: T): bool {.inline.} =
-  # static: echo $T
   let counter2 = t.counter + t.countDeleted
   result = mustRehash(t.dataLen, counter2)
 
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index 6b36553a967b2..f6aa2b0c27a65 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -46,6 +46,7 @@ type
   IntSet* = object       ## An efficient set of `int` implemented as a sparse bit set.
     elems: int           # only valid for small numbers
     counter, max: int
+    countDeleted: int
     head: PTrunk
     data: TrunkSeq
     a: array[0..33, int] # profiling shows that 34 elements are enough
@@ -55,6 +56,11 @@ proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
   result = (length * 2 < counter * 3) or (length - counter < 4)
 
+proc mustRehash2[T](t: T): bool {.inline.} =
+  # FACTOR with hashcommon.mustRehash2
+  let counter2 = t.counter + t.countDeleted
+  result = mustRehash(t.max + 1, counter2)
+
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
   # FACTOR with hashcommon.nextTry
   const PERTURB_SHIFT = 5
@@ -96,7 +102,7 @@ proc intSetPut(t: var IntSet, key: int): PTrunk =
     if t.data[h].key == key:
       return t.data[h]
     h = nextTry(h, t.max, perturb)
-  if mustRehash(t.max + 1, t.counter): intSetEnlarge(t)
+  if mustRehash2(t): intSetEnlarge(t)
   inc(t.counter)
   h = key and t.max
   perturb = key
@@ -109,6 +115,7 @@ proc intSetPut(t: var IntSet, key: int): PTrunk =
   t.data[h] = result
 
 proc bitincl(s: var IntSet, key: int) {.inline.} =
+  var ret: PTrunk
   var t = intSetPut(s, `shr`(key, TrunkShift))
   var u = key and TrunkMask
   t.bits[u shr IntShift] = t.bits[u shr IntShift] or
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index f27c955ee15df..6edcd65abf282 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -252,7 +252,7 @@ iterator items*[A](s: HashSet[A]): A =
   ##   echo b
   ##   # --> {(a: 1, b: 3), (a: 0, b: 4)}
   for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode): yield s.data[h].key
+    if isFilledAndValid(s.data[h].hcode): yield s.data[h].key
 
 proc containsOrIncl*[A](s: var HashSet[A], key: A): bool =
   ## Includes `key` in the set `s` and tells if `key` was already in `s`.
@@ -344,7 +344,7 @@ proc pop*[A](s: var HashSet[A]): A =
     doAssertRaises(KeyError, echo s.pop)
 
   for h in 0 .. high(s.data):
-    if isFilled(s.data[h].hcode):
+    if isFilledAndValid(s.data[h].hcode):
       result = s.data[h].key
       excl(s, result)
       return result
@@ -636,7 +636,7 @@ template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} =
     var idx = 0
     while h >= 0:
       var nxt = s.data[h].next
-      if isFilled(s.data[h].hcode):
+      if isFilledAndValid(s.data[h].hcode):
         yieldStmt
         inc(idx)
       h = nxt
@@ -870,7 +870,7 @@ proc `==`*[A](s, t: OrderedSet[A]): bool =
   while h >= 0 and g >= 0:
     var nxh = s.data[h].next
     var nxg = t.data[g].next
-    if isFilled(s.data[h].hcode) and isFilled(t.data[g].hcode):
+    if isFilledAndValid(s.data[h].hcode) and isFilledAndValid(t.data[g].hcode):
       if s.data[h].key == t.data[g].key:
         inc compared
       else:
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index ae623790bfa0a..2a48e60f03559 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -25,6 +25,7 @@ type
   SharedTable*[A, B] = object ## generic hash SharedTable
     data: KeyValuePairSeq[A, B]
     counter, dataLen: int
+    countDeleted: int
     lock: Lock
 
 template maxHash(t): untyped = t.dataLen-1
@@ -49,9 +50,10 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
   for i in 0..<oldSize:
     let eh = n[i].hcode
     if isFilled(eh):
+      var perturb = eh
       var j: Hash = eh and maxHash(t)
       while isFilled(t.data[j].hcode):
-        j = nextTry(j, maxHash(t))
+        j = nextTry(j, maxHash(t), perturb)
       rawInsert(t, t.data, n[i].key, n[i].val, eh, j)
   deallocShared(n)
 
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 031e5c3efbae9..b71ce90b1e188 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -763,37 +763,22 @@ iterator mvalues*[A, B](t: var Table[A, B]): var B =
       yield t.data[h].val
       assert(len(t) == L, "the length of the table changed while iterating over it")
 
-import intsets
-
-template inCache(): bool =
-  # TODO: instead modify how add works for table w multiple identical keys, eg usign instead hash((key, keyCount)) ?
-  if num <= N:
-    # short cache optimization: linear search, to avoid overhead of IntSet for common case
-    var found = false
-    for i in 0..<num:
-      if cache1[i] == h:
-        found = true
-        break
-    if not found:
-      if num < N:
-        cache1[num] = h
-        num.inc
-      else:
-        # replace cache1 with cache2
-        num.inc
-        cache2 = initIntSet()
-        for ai in cache1:
-          cache2.incl ai
-        cache2.incl h
-    found
-  else:
-    # large cache via IntSet to avoid quadratic behavior
-    if h notin cache2:
-      cache2.incl h
-      # optimization: we don't care about `num.inc`
-      false
-    else:
-      true
+template hasKeyOrPutCache(cache, h): bool =
+  # using `IntSet` would be an option worth considering to avoid quadratic
+  # behavior in case user misuses Table with lots of duplicate keys; but it
+  # has overhead in the common case of small number of duplicates.
+  # However: when lots of duplicates are used, all operations would be slow
+  # anyway because the full `hash(key)` is identical for these, which makes
+  # `nextTry` follow the exact same path for each key, resulting in large
+  # collision clusters. Alternatives could involve modifying the hash/retrieval
+  # based on duplicate key count.
+  var ret = false
+  for hi in cache:
+    if hi == h:
+      ret = true
+      break
+  if not ret: cache.add h
+  ret
 
 iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ## Iterates over any value in the table ``t`` that belongs to the given ``key``.
@@ -820,13 +805,10 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   var perturb = hc
 
   var num = 0
-  const N = 0 # can optimize this
-  # const N = 20000 # can optimize this
-  var cache1 {.noinit.}: array[N, Hash]
-  var cache2: IntSet
+  var cache: seq[Hash]
   while isFilled(t.data[h].hcode): # CHECKME isFilledAndValid ??
     if t.data[h].key == key:
-      if not inCache():
+      if not hasKeyOrPutCache(cache, h):
         yield t.data[h].val
         assert(len(t) == L, "the length of the table changed while iterating over it")
     h = nextTry(h, high(t.data), perturb)
diff --git a/lib/pure/testutils.nim b/lib/pure/testutils.nim
new file mode 100644
index 0000000000000..7e56064975372
--- /dev/null
+++ b/lib/pure/testutils.nim
@@ -0,0 +1,8 @@
+## utilities to help writing tests
+
+import std/[sequtils, algorithm]
+
+proc sortedPairs*[T](t: T): auto =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  toSeq(t.pairs).sorted
diff --git a/tests/arc/trepr.nim b/tests/arc/trepr.nim
index 7a92112eda254..391622ff6d04f 100644
--- a/tests/arc/trepr.nim
+++ b/tests/arc/trepr.nim
@@ -1,7 +1,7 @@
 discard """
   cmd: "nim c --gc:arc $file"
   nimout: '''(a: true, n: doAssert)
-Table[system.string, trepr.MyType](data: @[], counter: 0)
+Table[system.string, trepr.MyType](data: @[], counter: 0, countDeleted: 0)
 nil
 '''
 """
diff --git a/tests/collections/ttables.nim b/tests/collections/ttables.nim
index 9eccf345a69c9..304e99795b000 100644
--- a/tests/collections/ttables.nim
+++ b/tests/collections/ttables.nim
@@ -8,7 +8,12 @@ And we get here
 '''
 joinable: false
 """
-import hashes, sequtils, tables, algorithm
+import hashes, sequtils, tables, algorithm, testutils
+
+block tableDollar:
+  # other tests should use `sortedPairs` to be robust to future table/hash
+  # implementation changes
+  doAssert ${1: 'a', 2: 'b'}.toTable in ["{1: 'a', 2: 'b'}", "{2: 'b', 1: 'a'}"]
 
 # test should not be joined because it takes too long.
 block tableadds:
@@ -232,8 +237,8 @@ block tablesref:
     for x in 0..1:
       for y in 0..1:
         assert t[(x,y)] == $x & $y
-    assert($t ==
-      "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+    assert t.sortedPairs ==
+      @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]
 
   block tableTest2:
     var t = newTable[string, float]()
@@ -340,7 +345,7 @@ block tablesref:
   block anonZipTest:
     let keys = @['a','b','c']
     let values = @[1, 2, 3]
-    doAssert "{'c': 3, 'a': 1, 'b': 2}" == $ toTable zip(keys, values)
+    doAssert zip(keys, values).toTable.sortedPairs == @[('a', 1), ('b', 2), ('c', 3)]
 
   block clearTableTest:
     var t = newTable[string, float]()
diff --git a/tests/collections/ttablesthreads.nim b/tests/collections/ttablesthreads.nim
index 5553b31ef85aa..757a8cebe0b87 100644
--- a/tests/collections/ttablesthreads.nim
+++ b/tests/collections/ttablesthreads.nim
@@ -3,7 +3,7 @@ discard """
   output: '''true'''
 """
 
-import hashes, tables, sharedtables
+import hashes, tables, sharedtables, testutils
 
 const
   data = {
@@ -47,8 +47,7 @@ block tableTest1:
   for x in 0..1:
     for y in 0..1:
       assert t[(x,y)] == $x & $y
-  assert($t ==
-    "{(x: 1, y: 1): \"11\", (x: 0, y: 0): \"00\", (x: 0, y: 1): \"01\", (x: 1, y: 0): \"10\"}")
+  assert t.sortedPairs == @[((x: 0, y: 0), "00"), ((x: 0, y: 1), "01"), ((x: 1, y: 0), "10"), ((x: 1, y: 1), "11")]
 
 block tableTest2:
   var t = initTable[string, float]()

From ae4281fe6e8d0692655dc2a651fa9ffb0bf5edee Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 15:30:48 -0800
Subject: [PATCH 11/19] fix allValues

---
 lib/pure/collections/tables.nim | 24 +++++++++---------------
 lib/pure/testutils.nim          |  5 +++++
 tests/collections/ttables.nim   | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index b71ce90b1e188..29340dc9e8f6d 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -786,19 +786,13 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   ## Used if you have a table with duplicate keys (as a result of using
   ## `add proc<#add,Table[A,B],A,B>`_).
   ##
-  ## **Examples:**
-  ##
-  ## .. code-block::
-  ##   var a = {'a': 3, 'b': 5}.toTable
-  ##   for i in 1..3:
-  ##     a.add('z', 10*i)
-  ##   echo a # {'a': 3, 'b': 5, 'z': 10, 'z': 20, 'z': 30}
-  ##
-  ##   for v in a.allValues('z'):
-  ##     echo v
-  ##   # 10
-  ##   # 20
-  ##   # 30
+  runnableExamples:
+    import testutils
+    var a = {'a': 3, 'b': 5}.toTable
+    for i in 1..3: a.add('z', 10*i)
+    doAssert a.sortedPairs == @[('a', 3), ('b', 5), ('z', 10), ('z', 20), ('z', 30)]
+    doAssert sortedItems(a.allValues('z')) == @[10, 20, 30]
+
   let hc = genHash(key)
   var h: Hash = hc and high(t.data)
   let L = len(t)
@@ -806,8 +800,8 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
 
   var num = 0
   var cache: seq[Hash]
-  while isFilled(t.data[h].hcode): # CHECKME isFilledAndValid ??
-    if t.data[h].key == key:
+  while isFilled(t.data[h].hcode): # `isFilledAndValid` would be incorrect, see test for `allValues`
+    if t.data[h].hcode == hc and t.data[h].key == key:
       if not hasKeyOrPutCache(cache, h):
         yield t.data[h].val
         assert(len(t) == L, "the length of the table changed while iterating over it")
diff --git a/lib/pure/testutils.nim b/lib/pure/testutils.nim
index 7e56064975372..ddbbc7dec1075 100644
--- a/lib/pure/testutils.nim
+++ b/lib/pure/testutils.nim
@@ -6,3 +6,8 @@ proc sortedPairs*[T](t: T): auto =
   ## helps when writing tests involving tables in a way that's robust to
   ## changes in hashing functions / table implementation.
   toSeq(t.pairs).sorted
+
+template sortedItems*(t: untyped): untyped =
+  ## helps when writing tests involving tables in a way that's robust to
+  ## changes in hashing functions / table implementation.
+  sorted(toSeq(t))
diff --git a/tests/collections/ttables.nim b/tests/collections/ttables.nim
index 304e99795b000..7459b3f137c4b 100644
--- a/tests/collections/ttables.nim
+++ b/tests/collections/ttables.nim
@@ -193,6 +193,23 @@ block ttables2:
   run1()
   echo "2"
 
+block allValues:
+  var t: Table[int, string]
+  var key = 0
+  let n = 1000
+  for i in 0..<n: t.add(i, $i)
+  const keys = [0, -1, 12]
+  for i in 0..1:
+    for key in keys:
+      t.add(key, $key & ":" & $i)
+  for i in 0..<n:
+    if i notin keys:
+      t.del(i)
+  doAssert t.sortedPairs == @[(-1, "-1:0"), (-1, "-1:1"), (0, "0"), (0, "0:0"), (0, "0:1"), (12, "12"), (12, "12:0"), (12, "12:1")]
+  doAssert sortedItems(t.allValues(0)) == @["0", "0:0", "0:1"]
+  doAssert sortedItems(t.allValues(-1)) == @["-1:0", "-1:1"]
+  doAssert sortedItems(t.allValues(12)) == @["12", "12:0", "12:1"]
+  doAssert sortedItems(t.allValues(1)) == @[]
 
 block tablesref:
   const
@@ -374,3 +391,4 @@ block tablesref:
 
   orderedTableSortTest()
   echo "3"
+

From 86590c719fd58cec07fada0dc092bdda38088f5b Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 19:12:33 -0800
Subject: [PATCH 12/19] fixup

---
 lib/pure/collections/tables.nim |  2 +-
 lib/pure/hashes.nim             | 21 ++-------------------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 29340dc9e8f6d..07058d4990695 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -2250,7 +2250,7 @@ proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
   var perturb = hash(key)
   var h: Hash = perturb and high(data)
-  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: hadnle deletedMarker
+  while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
   data[h].key = key
   data[h].val = val
 
diff --git a/lib/pure/hashes.nim b/lib/pure/hashes.nim
index 3f805c937048a..b55a7865dd909 100644
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -112,30 +112,13 @@ proc hash*[T: proc](x: T): Hash {.inline.} =
   else:
     result = hash(pointer(x))
 
-proc hashUInt32*(x: uint32): Hash {.inline.} =
-  ## for internal use; user code should prefer `hash` overloads
-  # calling `hashUInt64(x)` would perform 1.736 worse, see thashes_perf toInt32
-  when nimvm: # in vmops
-    doAssert false
-  else:
-    # inspired from https://gist.github.com/badboy/6267743
-    var x = x xor ((x shr 20) xor (x shr 12))
-    result = cast[Hash](x xor (x shr 7) xor (x shr 4))
-
-const prime = uint(11)
 proc hash*(x: int|int64|uint|uint64|char|Ordinal): Hash {.inline.} =
   ## Efficient hashing of integers.
-  when sizeof(x) < 4:
-    cast[Hash](x)
-  elif sizeof(x) == 4:
-    cast[Hash](cast[uint](x) * prime)
-    # hashUInt32(x)
-  else:
-    cast[Hash](cast[uint](x) * prime)
+  cast[Hash](ord(x))
 
 proc hash*(x: float): Hash {.inline.} =
   ## Efficient hashing of floats.
-  var y = x + 0.0
+  var y = x + 0.0 # for denormalization
   result = hash(cast[ptr Hash](addr(y))[])
 
 # Forward declarations before methods that hash containers. This allows

From a77e64679dc4e54fdae0e65e0c62d338b9b39aaa Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 21:11:27 -0800
Subject: [PATCH 13/19] use translateBits optimization that further helps with
 #13393

---
 lib/pure/collections/hashcommon.nim   | 31 +++++++++++++++++++++------
 lib/pure/collections/intsets.nim      |  2 ++
 lib/pure/collections/sharedtables.nim |  2 +-
 lib/pure/collections/tableimpl.nim    |  2 +-
 lib/pure/collections/tables.nim       | 10 ++++-----
 5 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index d7b0e73e397de..13fb73a6dca0c 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -10,9 +10,13 @@
 # An ``include`` file which contains common code for
 # hash sets and tables.
 
+# type Hash = uint
+
 const
   growthFactor = 2
 
+from bitops import fastLog2
+
 when not defined(nimHasDefault):
   template default[T](t: typedesc[T]): T =
     var v: T
@@ -29,13 +33,19 @@ proc isFilledAndValid(hcode: Hash): bool {.inline.} =
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
+type UHash* = uint
 
-proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  const PERTURB_SHIFT = 5
-  # TODO: make perturb (maybe even Hash) unsigned everywhere to avoid back and forth conversions
-  var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
-  perturb = cast[Hash](perturb2)
-  result = ((5*h) + 1 + perturb) and maxHash
+proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
+  result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
+
+proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
+  # an optimization would be to use `(h + 1) and maxHash` for a few iterations
+  # and then switch to the formula below, to get "best of both worlds": good
+  # cache locality, except when a collision cluster is detected (ie, large number
+  # of iterations).
+  const PERTURB_SHIFT = 5 # consider using instead `numBitsMask=fastLog2(t.dataLen)`
+  result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
+  perturb = perturb shr PERTURB_SHIFT
 
 proc mustRehash(length, counter: int): bool {.inline.} =
   assert(length > counter)
@@ -45,11 +55,18 @@ proc mustRehash2[T](t: T): bool {.inline.} =
   let counter2 = t.counter + t.countDeleted
   result = mustRehash(t.dataLen, counter2)
 
+template getPerturb*(t: typed, hc: Hash): UHash =
+  let numBitsMask=fastLog2(dataLen(t)) # TODO: cache/store in t if needed
+  # this makes a major difference for cases like #13393; it causes the bits
+  # that were masked out in 1st position so they'll be masked in instead, and
+  # influence the recursion in nextTry earlier rather than later.
+  translateBits(cast[uint](hc), numBitsMask)
+
 template rawGetKnownHCImpl() {.dirty.} =
   if t.dataLen == 0:
     return -1
   var h: Hash = hc and maxHash(t) # start with real hash value
-  var perturb = hc
+  var perturb = t.getPerturb(hc)
   var deletedIndex = -1
   while true:
     if isFilledAndValid(t.data[h].hcode):
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index f6aa2b0c27a65..a8b804be55607 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -22,6 +22,8 @@
 import
   hashes
 
+# from hashcommon import UHash
+
 type
   BitScalar = uint
 
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index 2a48e60f03559..f25267e109250 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -50,7 +50,7 @@ proc enlarge[A, B](t: var SharedTable[A, B]) =
   for i in 0..<oldSize:
     let eh = n[i].hcode
     if isFilled(eh):
-      var perturb = eh
+      var perturb = t.getPerturb(eh)
       var j: Hash = eh and maxHash(t)
       while isFilled(t.data[j].hcode):
         j = nextTry(j, maxHash(t), perturb)
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index d048a2f20a099..c9ab975f08a5d 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -14,7 +14,7 @@ include hashcommon
 template rawGetDeepImpl() {.dirty.} =   # Search algo for unconditional add
   genHashImpl(key, hc)
   var h: Hash = hc and maxHash(t)
-  var perturb = hc
+  var perturb = t.getPerturb(hc)
   while true:
     let hcode = t.data[h].hcode
     if hcode == deletedMarker or hcode == freeMarker:
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 07058d4990695..bbd704fc265de 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -275,7 +275,7 @@ proc enlarge[A, B](t: var Table[A, B]) =
     let eh = n[i].hcode
     if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
-      var perturb = eh
+      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
         j = nextTry(j, maxHash(t), perturb)
       when defined(js):
@@ -796,7 +796,7 @@ iterator allValues*[A, B](t: Table[A, B]; key: A): B =
   let hc = genHash(key)
   var h: Hash = hc and high(t.data)
   let L = len(t)
-  var perturb = hc
+  var perturb = t.getPerturb(hc)
 
   var num = 0
   var cache: seq[Hash]
@@ -1281,7 +1281,7 @@ proc enlarge[A, B](t: var OrderedTable[A, B]) =
     let eh = n[h].hcode
     if isFilledAndValid(eh):
       var j: Hash = eh and maxHash(t)
-      var perturb = eh
+      var perturb = t.getPerturb(eh)
       while isFilled(t.data[j].hcode):
         j = nextTry(j, maxHash(t), perturb)
       rawInsert(t, t.data, n[h].key, n[h].val, n[h].hcode, j)
@@ -2248,7 +2248,7 @@ type
 
 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
-  var perturb = hash(key)
+  var perturb = t.getPerturb(hash(key))
   var h: Hash = perturb and high(data)
   while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
   data[h].key = key
@@ -2277,7 +2277,7 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
   if t.data.len == 0:
     return -1
-  var perturb = hash(key)
+  var perturb = t.getPerturb(hash(key))
   var h: Hash = perturb and high(t.data) # start with real hash value
   while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
     if t.data[h].key == key: return h

From 749776266c38573414141a23ffa5654214ac2774 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 21:26:44 -0800
Subject: [PATCH 14/19] fixup

---
 lib/pure/collections/tables.nim | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index bbd704fc265de..0502f615cf75f 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -2248,8 +2248,9 @@ type
 
 proc ctRawInsert[A](t: CountTable[A], data: var seq[tuple[key: A, val: int]],
                   key: A, val: int) =
-  var perturb = t.getPerturb(hash(key))
-  var h: Hash = perturb and high(data)
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(data)
   while data[h].val != 0: h = nextTry(h, high(data), perturb) # TODO: handle deletedMarker
   data[h].key = key
   data[h].val = val
@@ -2277,8 +2278,9 @@ proc remove[A](t: var CountTable[A], key: A) =
 proc rawGet[A](t: CountTable[A], key: A): int =
   if t.data.len == 0:
     return -1
-  var perturb = t.getPerturb(hash(key))
-  var h: Hash = perturb and high(t.data) # start with real hash value
+  let hc = hash(key)
+  var perturb = t.getPerturb(hc)
+  var h: Hash = hc and high(t.data) # start with real hash value
   while t.data[h].val != 0: # TODO: may need to handle t.data[h].hcode == deletedMarker?
     if t.data[h].key == key: return h
     h = nextTry(h, high(t.data), perturb)

From 5b639730490266191d96cd1b8694557394223970 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 22:04:31 -0800
Subject: [PATCH 15/19] _

---
 lib/pure/collections/hashcommon.nim | 9 ++++-----
 lib/pure/collections/intsets.nim    | 2 --
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 13fb73a6dca0c..672f03e371c30 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -10,13 +10,9 @@
 # An ``include`` file which contains common code for
 # hash sets and tables.
 
-# type Hash = uint
-
 const
   growthFactor = 2
 
-from bitops import fastLog2
-
 when not defined(nimHasDefault):
   template default[T](t: typedesc[T]): T =
     var v: T
@@ -55,8 +51,11 @@ proc mustRehash2[T](t: T): bool {.inline.} =
   let counter2 = t.counter + t.countDeleted
   result = mustRehash(t.dataLen, counter2)
 
+# from bitops import fastLog2 # PRTEMP
+
 template getPerturb*(t: typed, hc: Hash): UHash =
-  let numBitsMask=fastLog2(dataLen(t)) # TODO: cache/store in t if needed
+  # let numBitsMask = fastLog2(dataLen(t)) # TODO: cache/store in t if needed
+  let numBitsMask = 16 # PRTEMP can't use fastLog2
   # this makes a major difference for cases like #13393; it causes the bits
   # that were masked out in 1st position so they'll be masked in instead, and
   # influence the recursion in nextTry earlier rather than later.
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index a8b804be55607..f6aa2b0c27a65 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -22,8 +22,6 @@
 import
   hashes
 
-# from hashcommon import UHash
-
 type
   BitScalar = uint
 

From d52d5e3f3bb3678c38bb3ed3109f66021e0e2969 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 22:49:01 -0800
Subject: [PATCH 16/19] use sizeof(Hash) * 4 instead of fastLog2(dataLen(t))

---
 lib/pure/collections/hashcommon.nim | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 672f03e371c30..748fa84e083cd 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -39,7 +39,7 @@ proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
   # and then switch to the formula below, to get "best of both worlds": good
   # cache locality, except when a collision cluster is detected (ie, large number
   # of iterations).
-  const PERTURB_SHIFT = 5 # consider using instead `numBitsMask=fastLog2(t.dataLen)`
+  const PERTURB_SHIFT = 5 # consider tying this to `numBitsMask = fastLog2(t.dataLen)`
   result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
   perturb = perturb shr PERTURB_SHIFT
 
@@ -51,11 +51,10 @@ proc mustRehash2[T](t: T): bool {.inline.} =
   let counter2 = t.counter + t.countDeleted
   result = mustRehash(t.dataLen, counter2)
 
-# from bitops import fastLog2 # PRTEMP
-
 template getPerturb*(t: typed, hc: Hash): UHash =
-  # let numBitsMask = fastLog2(dataLen(t)) # TODO: cache/store in t if needed
-  let numBitsMask = 16 # PRTEMP can't use fastLog2
+  # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
+  # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
+  let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
   # this makes a major difference for cases like #13393; it causes the bits
   # that were masked out in 1st position so they'll be masked in instead, and
   # influence the recursion in nextTry earlier rather than later.

From 3f956aa3812cfc2322ba11b16b3977e4039d014d Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Tue, 18 Feb 2020 23:24:08 -0800
Subject: [PATCH 17/19] fixup

---
 lib/pure/collections/hashcommon.nim | 5 +++--
 lib/pure/collections/tableimpl.nim  | 5 +----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 748fa84e083cd..b2b4b57d66c7a 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -24,7 +24,8 @@ const deletedMarker = -1
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
 proc isFilledAndValid(hcode: Hash): bool {.inline.} =
-  result = hcode != 0 and hcode != deletedMarker # SPEED: could improve w bit magic
+  result = hcode != 0 and hcode != deletedMarker
+    # performance: we could use bit magic if needed
 
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
@@ -72,7 +73,7 @@ template rawGetKnownHCImpl() {.dirty.} =
       # zero ==key's for missing (e.g.inserts) and exactly one ==key for present.
       # It does slow down succeeding lookups by one extra Hash cmp&and..usually
       # just a few clock cycles, generally worth it for any non-integer-like A.
-      # TODO: optimize this: depending on type(key), skip hc comparison
+      # performance: we optimize this: depending on type(key), skip hc comparison
       if t.data[h].hcode == hc and t.data[h].key == key:
         return h
       h = nextTry(h, maxHash(t), perturb)
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index c9ab975f08a5d..df05c3bb87b77 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -92,10 +92,7 @@ template delImplIdx(t, i) =
     t.data[i].hcode = deletedMarker
     t.data[i].key = default(type(t.data[i].key))
     t.data[i].val = default(type(t.data[i].val))
-    # maybe not needed because counter+countDeleted doesn't change, unless formula is not a simple +
-    # if mustRehash2(t):
-    #   enlarge(t) # RENAME enlarge...
-      # i = rawGetKnownHC(t, key, hc) # TODO
+    # mustRehash2 + enlarge not needed because counter+countDeleted doesn't change
 
 template delImpl() {.dirty.} =
   var hc: Hash

From f45e1f53cbde9cb1d0b206cc540f3ae399228e30 Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Wed, 19 Feb 2020 01:18:21 -0800
Subject: [PATCH 18/19] address comments

---
 lib/pure/collections/hashcommon.nim | 3 ++-
 lib/pure/testutils.nim              | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index b2b4b57d66c7a..95ecd8447067b 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -21,6 +21,8 @@ when not defined(nimHasDefault):
 const freeMarker = 0
 const deletedMarker = -1
 
+type UHash = uint
+
 # hcode for real keys cannot be zero.  hcode==0 signifies an empty slot.  These
 # two procs retain clarity of that encoding without the space cost of an enum.
 proc isFilledAndValid(hcode: Hash): bool {.inline.} =
@@ -30,7 +32,6 @@ proc isFilledAndValid(hcode: Hash): bool {.inline.} =
 proc isFilled(hcode: Hash): bool {.inline.} =
   result = hcode != 0
 
-type UHash* = uint
 
 proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
   result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
diff --git a/lib/pure/testutils.nim b/lib/pure/testutils.nim
index ddbbc7dec1075..bdf90b61635e2 100644
--- a/lib/pure/testutils.nim
+++ b/lib/pure/testutils.nim
@@ -1,4 +1,6 @@
-## utilities to help writing tests
+## Utilities to help writing tests
+##
+## Unstable, experimental API.
 
 import std/[sequtils, algorithm]
 

From 8e0bf1c9ffe0e24b5e2ff9a9325480ce6102df3a Mon Sep 17 00:00:00 2001
From: Timothee Cour <timothee.cour2@gmail.com>
Date: Wed, 19 Feb 2020 05:05:37 -0800
Subject: [PATCH 19/19] refactorings: remove a mustRehash overload + other
 small refactorings

---
 lib/pure/collections/hashcommon.nim   | 26 ++++++++++++++++-------
 lib/pure/collections/intsets.nim      | 17 +++++++--------
 lib/pure/collections/setimpl.nim      |  4 ++--
 lib/pure/collections/sets.nim         | 30 ++++++++++++---------------
 lib/pure/collections/sharedtables.nim |  2 +-
 lib/pure/collections/tableimpl.nim    |  8 +++----
 lib/pure/collections/tables.nim       | 11 ----------
 7 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/lib/pure/collections/hashcommon.nim b/lib/pure/collections/hashcommon.nim
index 95ecd8447067b..bfc09ec3cc92e 100644
--- a/lib/pure/collections/hashcommon.nim
+++ b/lib/pure/collections/hashcommon.nim
@@ -37,6 +37,7 @@ proc translateBits(a: UHash, numBitsMask: int): UHash {.inline.} =
   result = (a shr numBitsMask) or (a shl (UHash.sizeof * 8 - numBitsMask))
 
 proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
   # an optimization would be to use `(h + 1) and maxHash` for a few iterations
   # and then switch to the formula below, to get "best of both worlds": good
   # cache locality, except when a collision cluster is detected (ie, large number
@@ -45,15 +46,24 @@ proc nextTry(h, maxHash: Hash, perturb: var UHash): Hash {.inline.} =
   result = cast[Hash]((5*cast[uint](h) + 1 + perturb) and cast[uint](maxHash))
   perturb = perturb shr PERTURB_SHIFT
 
-proc mustRehash(length, counter: int): bool {.inline.} =
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4) # synchronize with `rightSize`
-
-proc mustRehash2[T](t: T): bool {.inline.} =
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
   let counter2 = t.counter + t.countDeleted
-  result = mustRehash(t.dataLen, counter2)
-
-template getPerturb*(t: typed, hc: Hash): UHash =
+  let length = t.dataLen
+  assert(length > counter2)
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4) # synchronize with `rightSize`
+
+proc rightSize*(count: Natural): int {.inline.} =
+  ## Return the value of `initialSize` to support `count` items.
+  ##
+  ## If more items are expected to be added, simply add that
+  ## expected extra amount to the parameter before calling this.
+  ##
+  ## Internally, we want `mustRehash(t) == false` for t that was just resized.
+  # Make sure to synchronize with `mustRehash`
+  result = nextPowerOfTwo(count * 3 div 2 + 4)
+
+template getPerturb(t: typed, hc: Hash): UHash =
   # we can't use `fastLog2(dataLen(t))` because importing `bitops` would cause codegen errors
   # so we use a practical value of half the bit width (eg 64 / 2 = 32 on 64bit machines)
   let numBitsMask = sizeof(Hash) * 4 # ie, sizeof(Hash) * 8 / 2
diff --git a/lib/pure/collections/intsets.nim b/lib/pure/collections/intsets.nim
index f6aa2b0c27a65..7ca3797835a39 100644
--- a/lib/pure/collections/intsets.nim
+++ b/lib/pure/collections/intsets.nim
@@ -51,18 +51,15 @@ type
     data: TrunkSeq
     a: array[0..33, int] # profiling shows that 34 elements are enough
 
-proc mustRehash(length, counter: int): bool {.inline.} =
-  # FACTOR with hashcommon.mustRehash
-  assert(length > counter)
-  result = (length * 2 < counter * 3) or (length - counter < 4)
-
-proc mustRehash2[T](t: T): bool {.inline.} =
-  # FACTOR with hashcommon.mustRehash2
+proc mustRehash[T](t: T): bool {.inline.} =
+  # FACTOR between hashcommon.mustRehash, intsets.mustRehash
   let counter2 = t.counter + t.countDeleted
-  result = mustRehash(t.max + 1, counter2)
+  let length = t.max + 1
+  assert length > counter2
+  result = (length * 2 < counter2 * 3) or (length - counter2 < 4)
 
 proc nextTry(h, maxHash: Hash, perturb: var Hash): Hash {.inline.} =
-  # FACTOR with hashcommon.nextTry
+  # FACTOR between hashcommon.nextTry, intsets.nextTry
   const PERTURB_SHIFT = 5
   var perturb2 = cast[uint](perturb) shr PERTURB_SHIFT
   perturb = cast[Hash](perturb2)
@@ -102,7 +99,7 @@ proc intSetPut(t: var IntSet, key: int): PTrunk =
     if t.data[h].key == key:
       return t.data[h]
     h = nextTry(h, t.max, perturb)
-  if mustRehash2(t): intSetEnlarge(t)
+  if mustRehash(t): intSetEnlarge(t)
   inc(t.counter)
   h = key and t.max
   perturb = key
diff --git a/lib/pure/collections/setimpl.nim b/lib/pure/collections/setimpl.nim
index e372f3d4c1e6d..f8950e3546b3e 100644
--- a/lib/pure/collections/setimpl.nim
+++ b/lib/pure/collections/setimpl.nim
@@ -48,7 +48,7 @@ template inclImpl() {.dirty.} =
   var hc: Hash
   var index = rawGet(s, key, hc)
   if index < 0:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
       enlarge(s)
       index = rawGetKnownHC(s, key, hc)
     rawInsert(s, s.data, key, hc, -1 - index)
@@ -62,7 +62,7 @@ template containsOrInclImpl() {.dirty.} =
   if index >= 0:
     result = true
   else:
-    if mustRehash(len(s.data), s.counter):
+    if mustRehash(s):
       enlarge(s)
       index = rawGetKnownHC(s, key, hc)
     rawInsert(s, s.data, key, hc, -1 - index)
diff --git a/lib/pure/collections/sets.nim b/lib/pure/collections/sets.nim
index 6edcd65abf282..9f800e6d1f702 100644
--- a/lib/pure/collections/sets.nim
+++ b/lib/pure/collections/sets.nim
@@ -88,9 +88,6 @@ const
 
 include setimpl
 
-proc rightSize*(count: Natural): int {.inline.}
-
-
 # ---------------------------------------------------------------------
 # ------------------------------ HashSet ------------------------------
 # ---------------------------------------------------------------------
@@ -595,16 +592,6 @@ proc `$`*[A](s: HashSet[A]): string =
   ##   # --> {no, esc'aping, is " provided}
   dollarImpl()
 
-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of `initialSize` to support `count` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want `mustRehash(rightSize(x), x) == false`.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
-
 proc initSet*[A](initialSize = defaultInitialSize): HashSet[A] {.deprecated:
      "Deprecated since v0.20, use 'initHashSet'".} = initHashSet[A](initialSize)
 
@@ -1148,10 +1135,19 @@ when isMainModule and not defined(release):
       b.incl(2)
       assert b.len == 1
 
-    for i in 0 .. 32:
-      var s = rightSize(i)
-      if s <= i or mustRehash(s, i):
-        echo "performance issue: rightSize() will not elide enlarge() at ", i
+    block:
+      type FakeTable = object
+        dataLen: int
+        counter: int
+        countDeleted: int
+
+      var t: FakeTable
+      for i in 0 .. 32:
+        var s = rightSize(i)
+        t.dataLen = s
+        t.counter = i
+        doAssert s > i and not mustRehash(t),
+          "performance issue: rightSize() will not elide enlarge() at: " & $i
 
     block missingOrExcl:
       var s = toOrderedSet([2, 3, 6, 7])
diff --git a/lib/pure/collections/sharedtables.nim b/lib/pure/collections/sharedtables.nim
index f25267e109250..0fbbdb3eb0e31 100644
--- a/lib/pure/collections/sharedtables.nim
+++ b/lib/pure/collections/sharedtables.nim
@@ -33,7 +33,7 @@ template maxHash(t): untyped = t.dataLen-1
 include tableimpl
 
 template st_maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash2(t):
+  if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index # important to transform for mgetOrPutImpl
diff --git a/lib/pure/collections/tableimpl.nim b/lib/pure/collections/tableimpl.nim
index df05c3bb87b77..aabaeeeb37968 100644
--- a/lib/pure/collections/tableimpl.nim
+++ b/lib/pure/collections/tableimpl.nim
@@ -44,14 +44,14 @@ template checkIfInitialized() =
 
 template addImpl(enlarge) {.dirty.} =
   checkIfInitialized()
-  if mustRehash2(t): enlarge(t)
+  if mustRehash(t): enlarge(t)
   var hc: Hash
   var j = rawGetDeep(t, key, hc)
   rawInsert(t, t.data, key, val, hc, j)
   inc(t.counter)
 
 template maybeRehashPutImpl(enlarge) {.dirty.} =
-  if mustRehash2(t):
+  if mustRehash(t):
     enlarge(t)
     index = rawGetKnownHC(t, key, hc)
   index = -1 - index                  # important to transform for mgetOrPutImpl
@@ -92,7 +92,7 @@ template delImplIdx(t, i) =
     t.data[i].hcode = deletedMarker
     t.data[i].key = default(type(t.data[i].key))
     t.data[i].val = default(type(t.data[i].val))
-    # mustRehash2 + enlarge not needed because counter+countDeleted doesn't change
+    # mustRehash + enlarge not needed because counter+countDeleted doesn't change
 
 template delImpl() {.dirty.} =
   var hc: Hash
@@ -117,7 +117,7 @@ template initImpl(result: typed, size: int) =
 
 template insertImpl() = # for CountTable
   checkIfInitialized()
-  if mustRehash2(t): enlarge(t)
+  if mustRehash(t): enlarge(t)
   ctRawInsert(t, t.data, key, val)
   inc(t.counter)
 
diff --git a/lib/pure/collections/tables.nim b/lib/pure/collections/tables.nim
index 0502f615cf75f..bf08eaa92c446 100644
--- a/lib/pure/collections/tables.nim
+++ b/lib/pure/collections/tables.nim
@@ -251,8 +251,6 @@ template dataLen(t): untyped = len(t.data)
 
 include tableimpl
 
-proc rightSize*(count: Natural): int {.inline.}
-
 template get(t, key): untyped =
   ## retrieves the value at ``t[key]``. The value can be modified.
   ## If ``key`` is not in ``t``, the ``KeyError`` exception is raised.
@@ -582,15 +580,6 @@ proc `==`*[A, B](s, t: Table[A, B]): bool =
 
   equalsImpl(s, t)
 
-proc rightSize*(count: Natural): int {.inline.} =
-  ## Return the value of ``initialSize`` to support ``count`` items.
-  ##
-  ## If more items are expected to be added, simply add that
-  ## expected extra amount to the parameter before calling this.
-  ##
-  ## Internally, we want mustRehash(rightSize(x), x) == false.
-  result = nextPowerOfTwo(count * 3 div 2 + 4)
-
 proc indexBy*[A, B, C](collection: A, index: proc(x: B): C): Table[C, B] =
   ## Index the collection with the proc provided.
   # TODO: As soon as supported, change collection: A to collection: A[B]