Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hashes: implement murmur3 #12022

Merged
merged 12 commits into from
Aug 31, 2019
18 changes: 18 additions & 0 deletions compiler/vmops.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ from os import getEnv, existsEnv, dirExists, fileExists, putEnv, walkDir, getApp
from md5 import getMD5
from sighashes import symBodyDigest

from hashes import hash

template mathop(op) {.dirty.} =
registerCallback(c, "stdlib.math." & astToStr(op), `op Wrapper`)

Expand Down Expand Up @@ -88,6 +90,16 @@ proc staticWalkDirImpl(path: string, relative: bool): PNode =
result.add newTree(nkTupleConstr, newIntNode(nkIntLit, k.ord),
newStrNode(nkStrLit, f))

proc hashVmImplByte(a: VmArgs) {.nimcall.} =
# nkBracket[...]
let sPos = a.getInt(1).int
let ePos = a.getInt(2).int
let arr = a.getNode(0)
var bytes = newSeq[byte](arr.len)
for i in 0 ..< arr.len:
bytes[i] = byte(arr[i].intVal and 0xff)
setResult(a, hashes.hash(bytes, sPos, ePos))

proc registerAdditionalOps*(c: PCtx) =
proc gorgeExWrapper(a: VmArgs) =
let (s, e) = opGorge(getString(a, 0), getString(a, 1), getString(a, 2),
Expand Down Expand Up @@ -157,3 +169,9 @@ proc registerAdditionalOps*(c: PCtx) =
stackTrace(c, PStackFrame(prc: c.prc.sym, comesFrom: 0, next: nil), c.exceptionInstr,
"isExported() requires a symbol. '" & $n & "' is of kind '" & $n.kind & "'", n.info)
setResult(a, sfExported in n.sym.flags)

registerCallback c, "stdlib.hashes.hashVmImpl", proc(a: VmArgs) {.nimcall.} =
setResult(a, hashes.hash(a.getString(0), a.getInt(1).int, a.getInt(2).int))

registerCallback c, "stdlib.hashes.hashVmImplByte", hashVmImplByte
registerCallback c, "stdlib.hashes.hashVmImplChar", hashVmImplByte
76 changes: 47 additions & 29 deletions lib/pure/hashes.nim
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,12 @@ proc hash*(x: pointer): Hash {.inline.} =
else:
result = cast[Hash](cast[uint](x) shr 3) # skip the alignment

when not defined(booting):
proc hash*[T: proc](x: T): Hash {.inline.} =
## Efficient hashing of proc vars. Closures are supported too.
when T is "closure":
result = hash(rawProc(x)) !& hash(rawEnv(x))
else:
result = hash(pointer(x))
proc hash*[T: proc](x: T): Hash {.inline.} =
## Efficient hashing of proc vars. Closures are supported too.
when T is "closure":
result = hash(rawProc(x)) !& hash(rawEnv(x))
else:
result = hash(pointer(x))

proc hash*(x: int): Hash {.inline.} =
## Efficient hashing of integers.
Expand Down Expand Up @@ -165,14 +164,14 @@ else:
proc rotl32(x: uint32, r: int): uint32 {.inline.} =
(x shl r) or (x shr (32 - r))

proc murmurHash[T: char|int8|byte](x: openArray[T]): Hash =
proc murmurHash(x: openArray[byte]): Hash =
# https://github.com/PeterScott/murmur3/blob/master/murmur3.c
const
c1 = uint32 0xcc9e2d51
c2 = uint32 0x1b873593
n1 = uint32 0xe6546b64
m1 = uint32 0x85ebca6b
m2 = uint32 0xc2b2ae35
c1 = 0xcc9e2d51'u32
c2 = 0x1b873593'u32
n1 = 0xe6546b64'u32
m1 = 0x85ebca6b'u32
m2 = 0xc2b2ae35'u32
let
size = len(x)
stepSize = 4 # 32-bit
Expand Down Expand Up @@ -221,6 +220,15 @@ proc murmurHash[T: char|int8|byte](x: openArray[T]): Hash =
h1 = h1 xor (h1 shr 16)
return cast[Hash](h1)

proc hashVmImpl(x: string, sPos, ePos: int): Hash =
discard "look at compiler/vmops.nim"

proc hashVmImplChar(x: openArray[char], sPos, ePos: int): Hash =
discard "look at compiler/vmops.nim"

proc hashVmImplByte(x: openArray[byte], sPos, ePos: int): Hash =
discard "look at compiler/vmops.nim"

proc hash*(x: string): Hash =
## Efficient hashing of strings.
##
Expand All @@ -230,7 +238,10 @@ proc hash*(x: string): Hash =
runnableExamples:
doAssert hash("abracadabra") != hash("AbracadabrA")

murmurHash(x)
when nimvm:
result = hashVmImpl(x, 0, high(x))
else:
result = murmurHash(toOpenArrayByte(x, 0, high(x)))

proc hash*(x: cstring): Hash =
## Efficient hashing of null-terminated strings.
Expand All @@ -240,9 +251,10 @@ proc hash*(x: cstring): Hash =
doAssert hash(cstring"abracadabra") != hash(cstring"AbracadabrA")

when not defined(JS) and defined(nimToOpenArrayCString):
murmurHash(toOpenArray(x, 0, x.high))
murmurHash(toOpenArrayByte(x, 0, x.high))
else:
murmurHash($x)
let xx = $x
murmurHash(toOpenArrayByte(xx, 0, high(xx)))

proc hash*(sBuf: string, sPos, ePos: int): Hash =
## Efficient hashing of a string buffer, from starting
Expand All @@ -253,7 +265,7 @@ proc hash*(sBuf: string, sPos, ePos: int): Hash =
var a = "abracadabra"
doAssert hash(a, 0, 3) == hash(a, 7, 10)

murmurHash(toOpenArray(sBuf, sPos, ePos))
murmurHash(toOpenArrayByte(sBuf, sPos, ePos))


proc hashIgnoreStyle*(x: string): Hash =
Expand Down Expand Up @@ -355,8 +367,13 @@ proc hash*[T: tuple](x: T): Hash =

proc hash*[A](x: openArray[A]): Hash =
## Efficient hashing of arrays and sequences.
when A is char|int8|byte:
murmurHash(x)
when A is byte:
result = murmurHash(x)
elif A is char:
when nimvm:
result = hashVmImplChar(x, 0, x.high)
else:
result = murmurHash(toOpenArrayByte(x, 0, x.high))
else:
for a in x:
result = result !& hash(a)
Expand All @@ -371,8 +388,16 @@ proc hash*[A](aBuf: openArray[A], sPos, ePos: int): Hash =
let a = [1, 2, 5, 1, 2, 6]
doAssert hash(a, 0, 1) == hash(a, 3, 4)

when A is char|int8|byte:
murmurHash(toOpenArray(aBuf, sPos, ePos))
when A is byte:
Copy link
Member

@timotheecour timotheecour Aug 28, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • what about int8/uint8? at least int8 was handled before this change IIRC
  • ditto above
    maybe: when sizeof(A)==1 and A isnot char: ...
    vm supports casting integers of same size so everything could be cast to 1 type (eg byte) without having to add overloads. Ideally (but out of scope for this PR) there are more things that VM should allow to cast safely

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't consider it important enough but fair enough.

when nimvm:
result = hashVmImplByte(aBuf, 0, aBuf.high)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hashVmImplByte(aBuf, sPos, ePos) ?
ditto below.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point. :-)

else:
result = murmurHash(toOpenArray(aBuf, sPos, ePos))
elif A is char:
when nimvm:
result = hashVmImplChar(aBuf, 0, aBuf.high)
else:
result = murmurHash(toOpenArrayByte(aBuf, sPos, ePos))
else:
for i in sPos .. ePos:
result = result !& hash(aBuf[i])
Expand Down Expand Up @@ -408,7 +433,7 @@ when isMainModule:
block smallSize: # no multibyte hashing
let
xx = @['H','i']
ii = @[72'i8, 105]
ii = @[72'u8, 105]
ss = "Hi"
doAssert hash(xx) == hash(ii)
doAssert hash(xx) == hash(ss)
Expand All @@ -426,10 +451,3 @@ when isMainModule:
doAssert hash(xx) == hash(ssl, 0, 4)
doAssert hash(xx, 0, 3) == hash(xxl, 0, 3)
doAssert hash(xx, 0, 3) == hash(ssl, 0, 3)
block misc:
let
a = [1'u8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
b = [1'i8, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4]
doAssert hash(a) == hash(b)
doAssert hash(a, 2, 5) == hash(b, 2, 5)
doAssert hash(a, 0, 0) == hash(b, 7, 7)
7 changes: 7 additions & 0 deletions lib/system.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4506,6 +4506,8 @@ when not defined(js):
when defined(nimToOpenArrayCString):
proc toOpenArray*(x: cstring; first, last: int): openArray[char] {.
magic: "Slice".}
proc toOpenArrayByte*(x: cstring; first, last: int): openArray[byte] {.
magic: "Slice".}

proc toOpenArray*[T](x: seq[T]; first, last: int): openArray[T] {.
magic: "Slice".}
Expand All @@ -4515,8 +4517,13 @@ proc toOpenArray*[I, T](x: array[I, T]; first, last: I): openArray[T] {.
magic: "Slice".}
proc toOpenArray*(x: string; first, last: int): openArray[char] {.
magic: "Slice".}

proc toOpenArrayByte*(x: string; first, last: int): openArray[byte] {.
magic: "Slice".}
proc toOpenArrayByte*(x: openArray[char]; first, last: int): openArray[byte] {.
magic: "Slice".}
proc toOpenArrayByte*(x: seq[char]; first, last: int): openArray[byte] {.
magic: "Slice".}

type
ForLoopStmt* {.compilerproc.} = object ## \
Expand Down