Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hashes: implement murmur3 #12022

Merged
merged 12 commits into from
Aug 31, 2019
2 changes: 1 addition & 1 deletion compiler/ccgcalls.nim
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ proc openArrayLoc(p: BProc, n: PNode): Rope =
result = "($1)+($2), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c)]
else:
result = "($1)+(($2)-($4)), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c), intLiteral(first)]
of tyOpenArray, tyVarargs, tyUncheckedArray:
of tyOpenArray, tyVarargs, tyUncheckedArray, tyCString:
result = "($1)+($2), ($3)-($2)+1" % [rdLoc(a), rdLoc(b), rdLoc(c)]
of tyString, tySequence:
if skipTypes(n.typ, abstractInst).kind == tyVar and
Expand Down
1 change: 1 addition & 0 deletions compiler/condsyms.nim
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,4 @@ proc initDefines*(symbols: StringTableRef) =

defineSymbol("nimFixedOwned")
defineSymbol("nimHasStyleChecks")
defineSymbol("nimToOpenArrayCString")
131 changes: 98 additions & 33 deletions lib/pure/hashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ type
## always have a size of a power of two and can use the ``and``
## operator instead of ``mod`` for truncation of the hash value.

const
IntSize = sizeof(int)

proc `!&`*(h: Hash, val: int): Hash {.inline.} =
## Mixes a hash value `h` with `val` to produce a new hash value.
##
Expand Down Expand Up @@ -151,27 +148,78 @@ proc hash*(x: float): Hash {.inline.} =
proc hash*[A](x: openArray[A]): Hash
proc hash*[A](x: set[A]): Hash

template bytewiseHashing(result: Hash, x: typed, start, stop: int) =
for i in start .. stop:
result = result !& hash(x[i])
result = !$result

template hashImpl(result: Hash, x: typed, start, stop: int) =
when defined(JS):
proc imul(a, b: uint32): uint32 =
# https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/imul
let mask = 0xffff'u32
var
aHi = (a shr 16) and mask
aLo = a and mask
bHi = (b shr 16) and mask
bLo = b and mask
result = (aLo * bLo) + (aHi * bLo + aLo * bHi) shl 16
else:
template imul(a, b: uint32): untyped = a * b

proc rotl32(x: uint32, r: int): uint32 {.inline.} =
(x shl r) or (x shr (32 - r))

proc murmurHash[T: char|int8|byte](x: openArray[T]): Hash =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it easy to make this non-generic? Only offer the char or byte variant. Should remove some code bloat.

# https://github.com/PeterScott/murmur3/blob/master/murmur3.c
const
c1 = uint32 0xcc9e2d51
c2 = uint32 0x1b873593
n1 = uint32 0xe6546b64
m1 = uint32 0x85ebca6b
m2 = uint32 0xc2b2ae35
let
elementSize = sizeof(x[start])
stepSize = IntSize div elementSize
var i = start
while i <= stop+1 - stepSize:
var n = 0
size = len(x)
stepSize = 4 # 32-bit
n = size div stepSize
var
h1: uint32
i = 0

# body
while i < n * stepSize:
var k1: uint32
when nimvm:
# we cannot cast in VM, so we do it manually
for j in countdown(stepSize-1, 0):
n = (n shl (8*elementSize)) or ord(x[i+j])
var j = stepSize
while j > 0:
dec j
k1 = (k1 shl 8) or (ord(x[i+j])).uint32
else:
n = cast[ptr Hash](unsafeAddr x[i])[]
result = result !& n
i += stepSize
bytewiseHashing(result, x, i, stop) # hash the remaining elements and finish
k1 = cast[ptr uint32](unsafeAddr x[i])[]
inc i, stepSize

k1 = imul(k1, c1)
k1 = rotl32(k1, 15)
k1 = imul(k1, c2)

h1 = h1 xor k1
h1 = rotl32(h1, 13)
h1 = h1*5 + n1

# tail
var k1: uint32
var rem = size mod stepSize
while rem > 0:
dec rem
k1 = (k1 shl 8) or (ord(x[i+rem])).uint32
k1 = imul(k1, c1)
k1 = rotl32(k1, 15)
k1 = imul(k1, c2)
h1 = h1 xor k1

# finalization
h1 = h1 xor size.uint32
h1 = h1 xor (h1 shr 16)
h1 = imul(h1, m1)
h1 = h1 xor (h1 shr 13)
h1 = imul(h1, m2)
h1 = h1 xor (h1 shr 16)
return cast[Hash](h1)

proc hash*(x: string): Hash =
## Efficient hashing of strings.
Expand All @@ -182,7 +230,7 @@ proc hash*(x: string): Hash =
runnableExamples:
doAssert hash("abracadabra") != hash("AbracadabrA")

hashImpl(result, x, 0, high(x))
murmurHash(x)

proc hash*(x: cstring): Hash =
## Efficient hashing of null-terminated strings.
Expand All @@ -191,7 +239,10 @@ proc hash*(x: cstring): Hash =
doAssert hash(cstring"AbracadabrA") == hash("AbracadabrA")
doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")

hashImpl(result, x, 0, high(x))
when not defined(JS) and defined(nimToOpenArrayCString):
murmurHash(toOpenArray(x, 0, x.high))
else:
murmurHash($x)

proc hash*(sBuf: string, sPos, ePos: int): Hash =
## Efficient hashing of a string buffer, from starting
Expand All @@ -202,7 +253,8 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
var a = "abracadabra"
doAssert hash(a, 0, 3) == hash(a, 7, 10)

hashImpl(result, sBuf, sPos, ePos)
murmurHash(toOpenArray(sBuf, sPos, ePos))


proc hashIgnoreStyle*(x: string): Hash =
## Efficient hashing of strings; style is ignored.
Expand Down Expand Up @@ -300,12 +352,15 @@ proc hash*[T: tuple](x: T): Hash =
result = result !& hash(f)
result = !$result


proc hash*[A](x: openArray[A]): Hash =
## Efficient hashing of arrays and sequences.
when A is char|SomeInteger:
hashImpl(result, x, 0, x.high)
when A is char|int8|byte:
murmurHash(x)
else:
bytewiseHashing(result, x, 0, x.high)
for a in x:
result = result !& hash(a)
result = !$result

proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
## Efficient hashing of portions of arrays and sequences, from starting
Expand All @@ -316,10 +371,12 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
let a = [1, 2, 5, 1, 2, 6]
doAssert hash(a, 0, 1) == hash(a, 3, 4)

when A is char|SomeInteger:
hashImpl(result, aBuf, sPos, ePos)
when A is char|int8|byte:
murmurHash(toOpenArray(aBuf, sPos, ePos))
else:
bytewiseHashing(result, aBuf, sPos, ePos)
for i in sPos .. ePos:
result = result !& hash(aBuf[i])
result = !$result

proc hash*[A](x: set[A]): Hash =
## Efficient hashing of sets.
Expand All @@ -334,26 +391,30 @@ when isMainModule:
a = ""
b = newSeq[char]()
c = newSeq[int]()
d = cstring""
e = "abcd"
doAssert hash(a) == 0
doAssert hash(b) == 0
doAssert hash(c) == 0
doAssert hash(d) == 0
doAssert hashIgnoreCase(a) == 0
doAssert hashIgnoreStyle(a) == 0
doAssert hash(e, 3, 2) == 0
block sameButDifferent:
doAssert hash("aa bb aaaa1234") == hash("aa bb aaaa1234", 0, 13)
doAssert hash("aa bb aaaa1234") == hash(cstring"aa bb aaaa1234")
doAssert hashIgnoreCase("aA bb aAAa1234") == hashIgnoreCase("aa bb aaaa1234")
doAssert hashIgnoreStyle("aa_bb_AAaa1234") == hashIgnoreCase("aaBBAAAa1234")
block smallSize: # no multibyte hashing
let
xx = @['H','e','l','l','o']
ii = @[72'i8, 101, 108, 108, 111]
ss = "Hello"
xx = @['H','i']
ii = @[72'i8, 105]
ss = "Hi"
doAssert hash(xx) == hash(ii)
doAssert hash(xx) == hash(ss)
doAssert hash(xx) == hash(xx, 0, xx.high)
doAssert hash(ss) == hash(ss, 0, ss.high)
block largeSize: # longer than 8 characters, should trigger multibyte hashing
block largeSize: # longer than 4 characters
let
xx = @['H','e','l','l','o']
xxl = @['H','e','l','l','o','w','e','e','n','s']
Expand All @@ -362,9 +423,13 @@ when isMainModule:
doAssert hash(xxl) == hash(xxl, 0, xxl.high)
doAssert hash(ssl) == hash(ssl, 0, ssl.high)
doAssert hash(xx) == hash(xxl, 0, 4)
doAssert hash(xx) == hash(ssl, 0, 4)
doAssert hash(xx, 0, 3) == hash(xxl, 0, 3)
doAssert hash(xx, 0, 3) == hash(ssl, 0, 3)
block misc:
let
a = [1'u8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
b = [1'i8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
doAssert hash(a) == hash(b)
doAssert hash(a, 2, 5) == hash(b, 2, 5)
doAssert hash(a, 0, 0) == hash(b, 7, 7)
3 changes: 3 additions & 0 deletions lib/system.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4499,6 +4499,9 @@ when defined(nimconfig):
when not defined(js):
proc toOpenArray*[T](x: ptr UncheckedArray[T]; first, last: int): openArray[T] {.
magic: "Slice".}
when defined(nimToOpenArrayCString):
proc toOpenArray*(x: cstring; first, last: int): openArray[char] {.
magic: "Slice".}

proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
magic: "Slice".}
Expand Down
7 changes: 4 additions & 3 deletions tests/parallel/tsendtwice.nim
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
discard """
output: '''ob @[]
output: '''ob2 @[]
ob @[]
ob3 @[]
ob2 @[]
3
ob2 @[]
ob @[]
ob3 @[]
ob2 @[]'''
'''
cmd: "nim c -r --threads:on $file"
"""

Expand Down