From d086eff8cc63f0be1d105bb3035fb92d2da4ad0f Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Sun, 12 Dec 2021 16:48:19 +0100 Subject: [PATCH] Added binding for MurmurHash2 (#24) --- murmurhash/mrmr.pxd | 2 +- murmurhash/mrmr.pyx | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/murmurhash/mrmr.pxd b/murmurhash/mrmr.pxd index e675245..94967c2 100644 --- a/murmurhash/mrmr.pxd +++ b/murmurhash/mrmr.pxd @@ -1,7 +1,7 @@ from libc.stdint cimport uint64_t, int64_t, uint32_t -cdef uint32_t hash32(void* key, int length, uint32_t seed) nogil +cdef uint32_t hash32(void* key, int length, uint32_t seed, int version) nogil cdef uint64_t hash64(void* key, int length, uint64_t seed) nogil cdef uint64_t real_hash64(void* key, int length, uint64_t seed) nogil cdef void hash128_x86(const void* key, int len, uint32_t seed, void* out) nogil diff --git a/murmurhash/mrmr.pyx b/murmurhash/mrmr.pyx index e7b9d7d..d0cb698 100644 --- a/murmurhash/mrmr.pyx +++ b/murmurhash/mrmr.pyx @@ -1,4 +1,4 @@ -from libc.stdint cimport uint64_t, int64_t, int32_t +from libc.stdint cimport uint64_t, int64_t, uint32_t, int32_t cdef extern from "murmurhash/MurmurHash3.h": @@ -9,11 +9,15 @@ cdef extern from "murmurhash/MurmurHash3.h": cdef extern from "murmurhash/MurmurHash2.h": uint64_t MurmurHash64A(void * key, int length, uint32_t seed) nogil uint64_t MurmurHash64B(void * key, int length, uint32_t seed) nogil + uint32_t MurmurHash2(void * key, int length, uint32_t seed) nogil -cdef uint32_t hash32(void* key, int length, uint32_t seed) nogil: +cdef uint32_t hash32(void* key, int length, uint32_t seed, int version) nogil: cdef int32_t out - MurmurHash3_x86_32(key, length, seed, &out) + if version==3: + MurmurHash3_x86_32(key, length, seed, &out) + else: + out = MurmurHash2(key, length, seed) return out @@ -34,17 +38,17 @@ cdef void hash128_x64(const void* key, int length, uint32_t seed, void* out) nog MurmurHash3_x64_128(key, length, seed, out) -cpdef int32_t hash(value, uint32_t seed=0): +cpdef int32_t hash(value, uint32_t seed=0, murmur_version=3): if isinstance(value, unicode): - return hash_unicode(value, seed=seed) + return hash_unicode(value, seed=seed, murmur_version=murmur_version) else: - return hash_bytes(value, seed=seed) + return hash_bytes(value, seed=seed, murmur_version=murmur_version) -cpdef int32_t hash_unicode(unicode value, uint32_t seed=0): - return hash_bytes(value.encode('utf8'), seed=seed) +cpdef int32_t hash_unicode(unicode value, uint32_t seed=0, murmur_version=3): + return hash_bytes(value.encode('utf8'), seed=seed, murmur_version=murmur_version) -cpdef int32_t hash_bytes(bytes value, uint32_t seed=0): +cpdef int32_t hash_bytes(bytes value, uint32_t seed=0, murmur_version=3): cdef char* chars = value - return hash32(chars, len(value), seed) + return hash32(chars, len(value), seed, murmur_version)