diff --git a/CMakeLists.txt b/CMakeLists.txt index 93731f8b3..bcba35bbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,7 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL GNU) endif() add_compile_options(-fimplicit-none) add_compile_options(-ffree-line-length-132) + add_compile_options(-fno-range-check) add_compile_options(-Wall) add_compile_options(-Wextra) add_compile_options(-Wimplicit-procedure) diff --git a/Makefile.manual b/Makefile.manual index b7af735b7..54dc3b89f 100644 --- a/Makefile.manual +++ b/Makefile.manual @@ -1,7 +1,7 @@ # Fortran stdlib Makefile FC ?= gfortran -FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all +FFLAGS ?= -Wall -Wextra -Wimplicit-interface -fPIC -g -fcheck=all -fno-range-check FYPPFLAGS ?= export FC diff --git a/doc/specs/index.md b/doc/specs/index.md index a3b0a5def..0d1afbffa 100644 --- a/doc/specs/index.md +++ b/doc/specs/index.md @@ -14,6 +14,8 @@ This is and index/directory of the specifications (specs) for each new module/fe - [ascii](./stdlib_ascii.html) - Procedures for handling ASCII characters - [bitsets](./stdlib_bitsets.html) - Bitset data types and procedures - [error](./stdlib_error.html) - Catching and handling errors + - [hash\_functions](./stdlib_has_functions.html) - Hashing integer + vectors or character strings - [IO](./stdlib_io.html) - Input/output helper & convenience - [kinds](./stdlib_kinds.html) - Kind parameters - [linalg](./stdlib_linalg.html) - Linear Algebra diff --git a/doc/specs/stdlib_hash_functions.md b/doc/specs/stdlib_hash_functions.md new file mode 100644 index 000000000..f84f60623 --- /dev/null +++ b/doc/specs/stdlib_hash_functions.md @@ -0,0 +1,1697 @@ +--- +title: Hash codes +--- + +# The `stdlib_32_bit_hash_functions` and `stdlib_64_bit_hash_functions` modules + +(TOC) + +## Overview of hash functions + +The comparison of lexical entities or other objects for equality +can be computationally expensive. +This cost is often reduced by computing a near unique integer value, +termed a hash code, from the structure of the object, termed a key, +using a procedure, termed a hash function. +Equality of hash codes is a necessary, but not sufficient, condition +for the original objects to be equal. +As integer comparisons are very efficient, performing an initial +comparison of hash codes and then performing a detailed comparison +only if the hash codes are equal can improve performance. +The hash codes, in turn, can be mapped to a smaller set of integers, +that can be used as an index, termed a hash index, to a rank one +array, often termed a hash table. +This mapping will be known as a scalar hash. +The use of a hash table reduces the number of hash codes that need to +be compared, further improving performance. +A hash function can also be used to generate a checksum to verify that +data has not changed. +The Fortran Standard Library therefore provides procedures to compute +hash codes and scalar hashes, and derived types implementing hash +tables. +This document only discusses the hash codes and scalar hashes in the +library. + +## Licensing + +The Fortran Standard Library is distributed under the MIT License. +However components of the library may be based on code with additional +licensing restrictions. In particular, the hash codes are often based +on algorithms with additional restrictions on distribution. +The algorithms with such restrictions (`Fibonacci Hash`, `Universal +Multiplicative Hash`, +`FNV-1 Hash`, `FNV-1A Hash`, `nmhash32`, `nmhash32x`, `waterhash`, +`pengyhash` and `SpookyHash`) are discussed below. + +`FIBONACCI_HASH` is a scalar hash. It is an implementation in Fortran +2008 and signed two's complement integers of the Fibonacci Hash +described in D. E. Knuth, "The Art of +Computer Programming, Second Edition, Volume 3, Sorting and +Searching", Addison-Wesley, Upper Saddle River, NJ, +pp. 517-518, 1998. The algorithms in that source are considered public +domain. + +`UNIVERSAL_MULT_HASH` is a scalar hash. It is an implementation in +Fortran 2008 and signed two's complement integers of the +universal multiplicative hash algorithm of M. Dietzfelbinger, +T. Hagerup, J. Katajainen, and M. Penttonen, "A Reliable Randomized +Algorithm for the Closest-Pair Problem," J. Algorithms, Vol. 25, +No. 1, Oct. 1997, pp. 19-51. Because of its publication in the Journal +of Algorithms, the universal multiplicative hash algorithm is public +domain. + +`FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 and +signed two's complement integers of the +`FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +and Phong Vo, that has been released into the public +domain. Permission has been granted, by Landon Curt Noll, for the use +of these algorithms in the Fortran Standard Library. A description of +these functions is available at +. +These functions have been modified from their normal forms to also +encode the structure size in the output hash. + +Similarly `SPOOKY_HASH` and associated procedures are translations to +Fortran 2008 and signed two's complement integers of the unsigned 64 +bit version 2 `SpookyHash` functions of Bob +Jenkins to signed 64 +bit operations. Version 2 was chosen over version 1 as it has better +performance and fewer bad seeds +Bob Jenkins has also put this code in the public +domain and has given permission to treat this code as public domain in +the USA, provided the code can be used under other licenses and he is +given appropriate credit. + +`NMHASH32` and `NMHASH32x` are translations to Fortran 2008 and signed +two's complement integers of the unsigned 32 bit +hashes of James Z. M. Gao's `nmhash32` and `nmhash32x` version of 0.2, + +James Z. M. Gao has released his code under the BSD 2 Clause +License. The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + Copyright (c) 2021, James Z.M. Gao + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + +`WATER_HASH` is a translation to Fortran 2008 and signed two's +complement integers of the `waterhash` algorithm +of Tommy Ettinger. This algorithm is inspired by the Wy Hash of +Wang Yi. Tommy Ettinger's original C++ code, `waterhash.h`, +is available at URL: under +the `unlicense`, +. +The `unlicense` reads as follows: + + This is free and unencumbered software released into the public domain. + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to + +`PENGY_HASH` is a translation to Fortran 2008 and signed two's +complement arithmetic of the `pengyhash` algorithm of Alberto Fajardo, +copyright 2020. Alberto Fajardo's original C code, `pengyhash.c`, is +available at the URL: +https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c +under the BSD 2-Clause License: +https://github.com/tinypeng/pengyhash/blob/master/LICENSE + +The BSD 2-Clause license is as follows: + + BSD 2-Clause License + + pengyhash + Copyright (c) 2020 Alberto Fajardo + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +## The hash codes modules + +### Overview of the modules + +The Standard Library provides two modules implementing hash +functions and scalar hashes. +The `stdlib_32_bit_hash_functions` module provides procedures to +compute 32 bit integer hash codes and a scalar hash. +The 32 bit hash codes are useful for tables of up to `2**16` entries, +and for keys with a few hundred elements. +The `stdlib_64_bit_hash_functions` module provides hash procedures to +compute 64 bit integer hash codes and a scalar hash. +The 64 bit hash codes are useful for tables of up to `2**30` entries, +and for keys with thousands of elements. +While one of the codes in `stdlib_64_bit_hash_functions`, +`SPSOOKY_HASH`, can also be used to calculate 128 bit hash codes, none +of the current codes can be used to calculate 256 bit hash codes. +Such larger hash codes are useful for larger hash tables and keys, and +for checksums. +Such larger keys and tables are little used, if used at all, in +current +Fortran codes, but the larger hash codes may be added to the library +if there is a demand for them. + +Hash functions are often divided into two categories +"cryptographic" and "non-cryptographic". +Cryptographic hash functions produce codes that are infeasible to +reverse without additional information beyond the identity of +the hash function used to generate the code and the resulting codes. +Non-cryptographic codes, in some circumstances, are believed to be +reversible. +The modules only implement hash +functions that are believed to be non-cryptographic, with +implementations available in the public domain. + +There are a number of algorithms available for the computation of +non-cryptographic 32 and 64 bit hash codes that differ in their +computational complexity, +their relative performance on different size keys, and the +expected uniqueness (randomness) of the resulting hash codes. +Their relative performance in the analysis of text, in particular, +can depend on the processor, character set, language, and content. +The quality of a hash function is often evaluated using +the SMHasher test suite, originally written by +[Austin Appleby](https://github.com/aappleby/smhasher), but greatly +extended by [Reini Urban](https://github.com/rurban/smhasher). +All except the simplest, `FNV_1` and `FNV_1A`, of the hash functions +defined in the modules perform well on the tests in Reini Urban's +version of SMHasher. + +There are two problems in implementing hash functions in Fortran. +First, the static typing of Fortran makes it awkward to define general +purpose hash functions. +Instead hash functions are defined for some of the more common objects +that are sufficiently complicated that a direct comparison is costly +and common enough that a general procedure is useful: +character strings and rank one arrays of integers. +Other objects can, in principle, be hashed by using `transfer` to +map their contents to an integer array, typically one of kind `INT8`. +The other problem is that hash codes are typically defined using +modular unsigned integer arithmetic. +As such integers are not part of the current Fortran standard, +workarounds have to be used. +These can take two forms. +In one, the operations are emulated by using an integer of a +larger size, or, for the larger integers, by dividing the integer into +two lower and higher order halves, +and performing the operations on each half separately using +the larger integers. +In the other, the unsigned integers may be replaced directly by +the corresponding signed integers, but +otherwise not modifying the the code logic. +The first should be standard conforming on current processors, but +is more computationally intensive unless the processors recognize +underlying idioms that are rarely used in Fortran codes. The second is +not standard conforming as bit operations involving the sign are +undefined, +but should yield equivalent results with fewer operations on +processors with two's complement integers that do not trap on over +or under flow. The codes currently use the second method. + +In order to compile the hash function modules, the processors must +implement much of Fortran 2003, and selected components of Fortran +2008: submodules, 64 bit integers, and some bit intrinsics. +The main limitation on valid processors is whether they +implement the submodules enhancement of Fortran 2008. +In order to properly run the hash functions, the compilers must +use two's complement integers, and be able to execute them with +wraparound semantics and no integer overflow exceptions. +Current Fortran 2003+ processors solely use two's complement +integers, and appear to be able to turn off overflow detection, +so the modules use signed integer arithmetic. For that reason +trapping on signed arithmetic must be disabled. The command line +flags to disable overflow detection for processors implementing +submodules are summarized in the table below. +Note that FLANG, gfortran, ifort, and NAG all default to +integer overflow wrapping. + +|Processor|Legal flag|Illegal flag|Default| +|---------|----------|------------|-------| +| ARM Fortran | NA? | NA? | overflow wrapping? | +| Cray Fortran | NA? | NA? | overflow wrapping? | +| FLANG/PGI | -fwrapv | -ftrapv | -fwrapv | +| gfortran | -fwrapv | -ftrapv | -fwrapv | +| IBM Fortran | NA? | NA? | overflow wrapping? | +| ifort| NA? | NA? | overflow wrapping | +| NAG Fortran | -C=none | -C=intovf | -C=none | +| NEC Fortran | NA? | NA? | overflow wrapping? | +| NVIDIA Fortran | NA? | NA? | overflow wrapping? | + +All of the modules' hash functions take one or two arguments. +All of them have as their first argument the object to be hashed, +termed a *key*. +Most have a second argument, termed a *seed*, that sets the initial +value of the hash code changing the hash function behavior. +In particular, inputs that hash to the same hash index with a given +seed, will often hash to different indexes with a different seed. +This difference in behavior makes algorithms that use a seed much +more resistant to denial of service attacks that use the properties +of a known hash to increase the number of hash table collisions. +This additional integer must be kept the same for all hashes +in a given hash table, but can be changed and the objects rehashed +if collisions are unusually common. +The *seed* can be either a scalar or a two element array. +Some of the hash functions have alternatives that allow incremental +hashing. + +|Algorithm|Seed|Result| +|---------|----|------| +|FNV-1|None|32 or 64 bit integer| +|FNV-1a|None|32 or 64 bit integer| +|nmhash32 |32 bit scalar integer|32 bit integer| +|nmhash32x |32 bit scalar integer|32 bit integer| +|pengyhash |32 bit scalar integer|64 bit integer| +|Spooky Hash|64 bit two element vector|64 bit two element vector| +|waterhash|64 bit scalar integer|32 bit integer| + +The hash function modules each provide at least five algorithms for +hash functions: two optimized for small (< 32 `INT8` integer elements) +keys, and three optimized for large (> 100 `INT8` integer elements) +keys. +The core implementation for each algorithm is for keys that are +vectors of `INT8` integers. +These core implementations are then used in wrappers for keys +that are vectors of `INT16`, `INT32` and `INT64` integers, or default +character strings, in the expectation that inlining will eliminate the +overhead of transferring the other keys to `INT8` integer vectors. + +The `stdlib_32_bit_hash_functions` module provides +implementations of five hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *nmhash32* and *nmhash32x* of James Z. M. Gao; +and the *waterhash* of Tommy Ettinger. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_32_bit_fnv_hashes`, +`stdlib_32_bit_nmhashes`, and `stdlib_32_bit_water_hashes`, +respectively. The `nmhash32`, `nmhash32x`, and `waterhash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself +implements two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_64_bit_hash_functions` module also provides +implementations of four hash code algorithms: +the *FNV_1* and *FNV_1A* variants of Glenn Fowler, +Landon Curt Noll, and Kiem-Phong Vo; +the *pengynash* of Alberto Fajardo; +and the *SpookyHash* of Bob Jenkins. +The detailed implementation of each algorithm is handled in a separate +submodule: `stdlib_64_bit_fnv_hashes`, +`stdlib_64_bit_pengy_hashes`, and `stdlib_64_bit_spooky_hashes`, +respectively. +The `pengyhash`, and `Spooky Hash` algorithms +require seeds. The submodules provide separate seed generators +for each algorithm. +The module itself implements two scalar hash functions, +`FIBONACCI_HASH` and `UNIVERSAL_MULT_HASH`. +It also implements the subroutine, `ODD_RANDOM_INTEGER`, for +generating seeds for `UNIVERSAL_MULT_HASH`. +All assume a two's complement sign bit, and no out of +range checks. + +The `stdlib_32_bit_fnv_hashes` and `stdlib_64_bits_fnv_hashes` +submodules each provide implementations of ths FNV-1 and FNV-1A +algorithms in the form of two separate overloaded functions: `FNV_1` +and `FNV_1A`. +The FNV-1 and FNV-2 algorithms differ in their order of the +multiplication and exclusive or operations. +They differ from their normal implementation in that they also +encode the structure size in the hash code. +The 32 and 64 bit algorithms differ in their initial offsets and in +their multiplicative constants. +Analysis suggests that `FNV_1A` should be better at randomizing the +input, but tests with hash tables show negligible difference. +These algorithms have the reputation of being particularly useful for +small byte strings, i.e, strings of less than 32 bytes. +While they do not at all perform well on the SMHasher test suite, +usage indicates that that that this has little impact on the +performance of small hash tables, and the small size of the functions +allows their quick loading and retainment in the instruction cache, +givng a performance boost where the hashing is intermittent. +(See the +[SMHasher discussion](https://github.com/rurban/smhasher/README.md) +and S. Richter, V. Alvarez, and J. Dittrich, +["A Seven-Dimensional Analysis of Hashing Methods and its Implications on Query Processing"](https://bigdata.uni-saarland.de/publications/p249-richter.pdf). + +The `stdlib_32_bit_nmhashes` submodule provides implementations +of James Z.M. Gao's `nmhash32` and `nmhash32x` algorithms, +version 0.2, +in the form of the overloaded functions, `NMHASH32` and `NMHASH32X`. +The implementations are based on the scalar versions of Gao's +algorithms and not the vector versions that require access to +the vector instructions of some processors. +Both algorithms perform well on the SMHasher tests, and have no known +bad seeds. The vector versions of both codes perform well on large +keys, with the `nmhash32x` faster on short keys. To provide randomly +generated seeds for the two functions the submodule also defines the +subroutines `NEW_NMHASH32_SEED` and `NEW_NMHASH32X_SEED`. Gao claims +that `NMHASH32X` is significantly faster than `NMHASH32` on short +seeds, but slower on long seeds, but our limited testing so far shows +`NMHASH32X` to be significantly faster on short seeds and slightly +faster on long seeds. + +The `stdlib_32_bit_water_hashes` submodule provides implementations +of Tommy Ettinger's `waterhash` algorithm in the form of the overloaded +function, `WATER_HASH`. Water Hash has not been tested by Reini Urban, +but Tommy Ettinger has tested it with Urban's SMHasher and presents +results that shows Water Hash passing all the tests. So far his +testing hasn't found any bad seeds for the algorithm. To provide +randomly generated seeds for the hash function the submodule also +defines the subroutine `NEW_WATER_HASH_SEED`. + +The `stdlib_64_bit_pengy_hashes` submodule provides implementations of +Alberto Fajardo's `pengyhash` in the form of the overloaded function, +`PENGY_HASH`. Reini Urban's testing shows that PengyHash passes all +the tests and has no bad seeds. To provide randomly generated seeds +for the hash function the submodule also defines the subroutine +`NEW_PENGY_HASH_SEED`. + +The `stdlib_64_bit_spooky_hashes` submodule provides implementations +of Bob Jenkins' SpookyHash in the form of the overloaded function, +`SPOOKY_HASH`. Future implementations may provide the SpookyHash +incremental hashing procedures. +SpookyHash is optimized for large objects and should give excellent +performance for objects greater than about 96 byes, but has +significant overhead for smaller objects. +The code was designed for Little Endian processors, and will give +different results on Big Endian processors, but the hash quality on +those processors is probably just as good. +SpookyHash version 2 passes all of Reini Urban's SMHasher tests, and +has one bad seed only when reduced to a 32 bit output. +Its only potential problem is undefined behavior if the key is +misaligned. + +## The `stdlib_32_bit_hash_codes` module + +### Overview of the module + +Thirty two bit hash functions are primarily useful for generating hash +codes for hash tables. +Checksums generally benefit from having a larger number of bits. +The `stdlib_32_bit_hash_codes` module defines five public overloaded +32 bit hash code functions, `FNV_1`, `FNV-1A`, `NMHASH32`, `NMHASH32x` +and `WATER_HASH`, two scalar hash functions, `FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, four seed generators, `ODD_RANDOM_INTEGER` for +`UNIVERSAL_MULT_HASH`, and `NEW_NMHASH32_SEED`, `NEW_NMHASH32X_SEED`, +and `NEW_WATER_HASH_SEED`, for their respective hash code +functions. It also defines the integer kind constant, `INT_HASH`, and +a logical constant, `LITTLE_ENDIAN`, used to deal with one aspect of +the machine dependence of the hash codes. + +### The `INT_HASH` parameter + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_32_bit_hash_codes` deals exclusively with 32 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT32`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the processor's integers. To this end the +`stdlib_32_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that, if true, indicates that the processor has little +endian integers, and that if false indicates that the integers are big +endian. + +### Specifications of the `stdlib_32_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 32 bit integer. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT32` with at most the lowest +`nbits` nonzero. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**32/phi`, where `phi` is the golden ratio 1.618..., and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_32_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFF', int32) + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1_HASH` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_32_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A_HASH`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:fnv_1a_hash]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A_HASH` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_32_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_NMHASH32_SEED`- returns a valid input seed for `NMHASH32` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32`. + + +#### `NEW_NMHASH32X_SEED`- returns a valid input seed for `NMHASH32X` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `NMHASH32X` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_nmhash32x_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `NMHASH32X`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `NMHASH32X`. + + +#### `NEW_WATER_HASH_SEED`- returns a valid input seed for `WATER_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit "random" integer that is believed to be a valid +seed for `WATER_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_water_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT64`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `WATER_HASH`, but if any +are identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + + +##### Example + +See the example for `WATER_HASH`. + + +#### `NMHASH32`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32` is an implementation of the `nmhash32` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32 + use stdlib_32_bit_hash_codes, only: nmhash32, & + new_nmhash32_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32(array1, seed) + print *, seed, hash + end program demo_nmhash32 +``` + + +#### `NMHASH32X`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:nmhash32x]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT32`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`NMHASH32X` is an implementation of the `nmhash32x` hash code of +James Z. M. Gao. +This code has good, but not great, performance on long keys, poorer +performance on short keys. +As a result it should give fair performance for typical hash table +applications. +This code passes the SMHasher tests, and has no known bad seeds: + +##### Example + +```fortran + program demo_nmhash32x + use stdlib_32_bit_hash_codes, only: nmhash32x, & + new_nmhash32x_seed + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int32) :: seed = int(Z'11111111`, int32) + call new_nmhash32x_seed(seed) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = nmhash32x(array1, seed) + print *, seed, hash + end program demo_nmhash32x +``` + +#### `ODD_RANDOM_INTEGER` - returns an odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 32 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_32_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be a scalar integer variable of kind `INT32`. It is +an `intent(out)` argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 32 bit integer. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. + +`seed`: Shall be a scalar integer expression of kind `INT32`. It is an +`intent(in)` argument. It must have an odd value. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +32`. It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int32 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int32) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_odd_random_integer +``` + +#### `WATER_HASH`- calculates a hash code from a key and a seed + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit hash code from a rank 1 integer array or a default +character string, and the input `seed`. + +##### Syntax + +`code = [[stdlib_32_bit_hash_codes:water_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +`seed`: shall be an integer scalar expression of kind `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`WATER_HASH` is an implementation of the `waterhash` hash code of +Tommy Ettinger. +This code has excellent performance on long keys, and good performance +on short keys. +As a result it should give reasonable performance for typical hash +table applications. +This code passes the SMHasher tests. +The `waterhash` is based on the `wyhash` of Wang Yi. +While `wyhash` has a number of bad seeds, depending on the version, +so far testing has not found any bad seeds for `waterhash`. +It can have undefined behavior if the key is not word aligned. + +##### Example + +```fortran + program demo_water_hash + use stdlib_32_bit_hash_codes, only: water_hash, & + new_water_hash_seed + use iso_fortran_env, only: int32, int64 + implicit none + integer, allocatable :: array1(:) + integer(int32) :: hash + integer(int64) :: seed = int(Z'11111111`, int64) + call new_water_hash_seed( seed ) + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = water_hash(array1, seed) + print *, hash, seed + end program demo_water_hash +``` + +## The `stdlib_64_bit_hash_codes` module + +### Overview of the module + +Sixty four bit hash functions are generally overkill for hash table +applications, and are primarily useful for check sums and related +applications. +As checksums often have to deal with extremely large files or +directories, it is often useful to use incremental hashing as well as +direct hashing, so 64 bit and higher hash algorithms often provide +multiple implementations. The current module, for simplicity of API, +doesn't provide any incremental hashes. +The `stdlib_64_bit_hash_codes` module defines several public +overloaded 64 bit hash procedures, `FNV_1`, `FNV-1A`, +`PENGY_HASH`, and `SPOOKY_HASH`, two scalar hash functions, +`FIBONACCI_HASH` and +`UNIVERSAL_MULT_HASH`, a seed generator, `ODD_RANDOM_INTEGER`, for the +`UNIVERSAL_MULT_HASH`, and two seed generators, `NEW_PENGY_HASH_SEED` +and `NEW_SPOOKY_HASH_SEED` for their respective hash functions. It +also defines the integer kind constant, `INT_HASH`, used to specify +the kind of the hash function results, and a logical constant, +`LITTLE_ENDIAN`, used to deal with one aspect of the machine +dependence of the hash codes. +Note that while SpookyHash can be used as a sixty four bit hash +algorithm, its algorithms actually returns two element integer arrays +of kind `INT64`, so it can also be used as a 128 bit hash. + +### The `INT_HASH` parameters + +It is necessary to define the kind of integer used to return the hash +code. +As `stdlib_64_bit_hash_codes` deals exclusively with 64 bit hash codes, +`INT_HASH` is an alias for the integer kind `INT64`. + +### The `LITTLE_ENDIAN` parameter + +In implementing hash functions it is sometimes necessary to know the +"endianess" of the processor's integers. To this end the +`stdlib_64_bit_hash_codes` module defines the logical parameter +`LITTLE_ENDIAN` that if true indicates that the processor has little +endian integers, and that if false indicates that the integers are big +endian. + + +### Specifications of the `stdlib_64_bit_hash_codes` procedures + +#### `FIBONACCI_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fibonacci_hash]]( key, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be a scalar integer expression of kind `INT64`. It is an +`intent(in)` argument. + +`nbits` Shall be a scalar default integer expression with `0 < nbits < +64`. It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT64` with at most the lowest +`nbits` nonzero. + +##### Note + +`FIBONACCI_HASH` is an implementation of the Fibonacci Hash of Donald +E. Knuth. It multiplies the `KEY` by the odd valued approximation to +`2**64/phi`, where `phi` is the golden ratio 1.618..., and returns the +`nbits` upper bits of the product as the lowest bits of the result. + +##### Example + +```fortran + program demo_fibonacci_hash + use stdlib_64_bit_hash_codes, only: fibonacci_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, source + allocate( array1(0:2**6-1) ) + array1(:) = 0 + source = int(Z'1FFFFFFFF', int64) + hash = fibonacci_hash(source, 6) + azray1(hash) = source + print *, hash + end program demo_fibonacci_hash +``` + +#### `FNV_1`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT64`. + +##### Note + +`FNV_1` is an implementation of the original FNV-1 hash code of Glenn +Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications, although it is rare for them to need 64 bits. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + + +##### Example + +```fortran + program demo_fnv_1_hash + use stdlib_64_bit_hash_codes, only: fnv_1_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1_hash(array1) + print *, hash + end program demo_fnv_1_hash +``` + + +#### `FNV_1A`- calculates a hash code from a key + +##### Status + +Experimental + +##### Description + +Calculates a 64 bit hash code from a rank 1 integer array or a default +character string. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:fnv_1a]]( key )` + +##### Class + +Pure function + +##### Argument + +`key`: Shall be a deferred length default character scalar expression +or a rank 1 integer array expression of kind `INT8`, `INT16`, +`INT32`, or `INT64`. +It is an `intent(in)` argument. + +##### Result + +The result is a scalar integer of kind `INT32`. + +##### Note + +`FNV_1A` is an implementation of the alternative FNV-1a hash code of +Glenn Fowler, Landon Curt Noll, and Phong Vo. +It differs from typical implementations in that it also ecodes the +size of the structure in the hash code. +This code is relatively fast on short keys, and is small enough that it +will often be retained in the instruction cache if hashing is +intermittent. +As a result it should give good performance for typical hash table +applications. +This code does not pass any of the SMHasher tests, but the resulting +degradation in performance due to its larger number of collisions is +expected to be minor compared to its faster hashing rate. + +##### Example + +```fortran + program demo_fnv_1a_hash + use stdlib_64_bit_hash_codes, only: fnv_1a_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash + array1 = [ 5, 4, 3, 1, 10, 4, 9] + hash = fnv_1a_hash(array1) + print *, hash + end program demo_fnv_1a_hash +``` + + +#### `NEW_PENGY_HASH_SEED`- returns a valid input seed for `PENGY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit "random" integer that is believed to be a valid +seed for `PENGY_HASH` and is also different from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_pengy_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined integer scalar variable of kind `INT32`. +It is an `intent(inout)` argument. On input `seed` should be defined, +and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `PENGY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `PENGY_HASH`. + + +#### `NEW_SPOOKY_HASH_SEED`- returns a valid input seed for `SPOOKY_HASH` + +##### Status + +Experimental + +##### Description + +Calculates a 32 bit two element vector of "random" integer values that +is believed to be a valid seed for `SPOOKY_HASH` and is also different +from the input seed. + +##### Syntax + +`code = call [[stdlib_32_bit_hash_codes:new_spooky_hash_seed]]( seed )` + +##### Class + +Subroutine + +##### Argument + +`seed`: shall be a defined two element integer vector variable of kind +`INT32`. It is an `intent(inout)` argument. On input `seed` should be +defined, and on output it will be different from the input `seed`. + +##### Note + +Currently there are no known bad seeds for `SPOOKY_HASH`, but if any are +identified the procedure will be revised so that they cannot be +returned. This subroutine uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See the example for `SPOOKY_HASH`. + + +#### `ODD_RANDOM_INTEGER` - returns odd integer + +##### Status + +Experimental + +##### Description + +Returns a random 64 bit integer distributed uniformly over the odd values. + +##### Syntax + +`call [[stdlib_64_bit_hash_codes:odd_random_integer]]( harvest )` + +##### Class + +Subroutine + +##### Argument + +`harvest`: Shall be an integer of kind `INT64`. It is an `intent(out)` +argument. + +##### Note + +`ODD_RANDOM_INTEGER` is intended to generate seeds for + `UNIVERSAL_MULT_HASH`. `ODD_RANDOM_NUMBER` uses Fortran's intrinsic + `RANDOM_NUMBER` and the values returned can be changed by calling the + intrinsic `RANDOM_INIT`. + +##### Example + +See `UNIVERSAL_MULT_HASH`. + + +#### `PENGY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a scalar 32 bit integer, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:pengy_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar expression of type default character or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be an integer ex of kind `INT64`. It ispression +an `intent(in)` argument. + +##### Result + +The result is an integer of kind `INT64`. + +##### Note + +`PENGY_HASH` is an implementation of the 64 bit `pengyhash` of Alberto +Fajardo. The hash has acceptable performance on small keys, and good +performance on long keys. It passes all the SMHasher tests, and has +no known bad seeds. + +##### Exampl + +```fortran + program demo_pengy_hash + use stdlib_64_bit_hash_codes, only: new_pengy_hash_seed, pengy_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash + integer(int32) :: seed + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = 0_int32 + call new_pengy_hash_seed( seed ) + hash = pengy_hash( key, seed ) + print *, seed, hash + end program demo_pengy_hash +``` + + +#### `SPOOKY_HASH` - maps a character string or integer vector to an integer + +##### Status + +Experimental + +##### Description + +Maps a character string or integer vector to a 64 bit integer whose +value also depends on a two element vector, `seed`. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:spooky_hash]]( key, seed )` + +##### Class + +Pure function + +##### Arguments + +`key`: shall be a scalar of type default character expression or a +Rank 1 integer vector expression of kind `INt8`, `INT16`, `INT32`, or +`INTT64`. It is an `intent(in)` argument. + +`seed`: shall be a two element integer vector expression of kind +`INT64`. It is an `intent(in)` argument. + +##### Result + +The result is a two element integer vector of kind `INT64`. + +##### Note + +`SPOOKY_HASH` is an implementation of the 64 bit version 2 of +SpookyHash of Bob Jenkins. The code was designed for Little-Endian +processors. The output is different on Big Endian processors, but still +probably as good quality. It is often used as a 64 bit hash using the +first element of the returned value, but can be used as a 128 bit +hash. This version of `SPOOKY_HASH` has good performance on small keys +and excellent performance on long keys. It passes all the SMHasher tests +and has no known bad seeds. + +##### Example + +```fortran + program demo_spooky_hash + use stdlib_64_bit_hash_codes, only: new_spooky_hash_seed, & + spooky_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: key(:) + integer(int64) :: hash(2), seed(2), source + key = [ 0_int64, 1_int64, 2_int64, 3_int64 ] + seed = [ 119_int64, 2_int64**41-1 ] + call new_spooky_hash_seed( seed ) + hash = spooky_hash( key, seed ) + print *, seed, hash + end program demo_spooky_hash +``` + +#### `UNIVERSAL_MULT_HASH` - maps an integer to a smaller number of bits + +##### Status + +Experimental + +##### Description + +Calculates an `nbits` hash code from a 64 bit integer. + +##### Syntax + +`code = [[stdlib_64_bit_hash_codes:universal_mult_hash]]( key, seed, nbits )` + +##### Class + +Pure function + +##### Arguments + +`key`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. + +`seed`: Shall be an integer of kind `INT64`. It is an `intent(in)` +argument. It should be an odd value. + +`nbits` Shall be a default integer with `0 < nbits < 64`. It is an +`intent(in)` argument. It must be an odd integer. + +##### Result + +The result is an integer of kind `INT64` with at most the lowest +`nbits` nonzero. + +##### Note + +`UNIVERSAL_MULT_HASH` is an implementation of the Universal +Multiplicative Hash of M. Dietzfelbinger, et al. +It multiplies the `KEY` by `SEED`, and returns the +`NBITS` upper bits of the product as the lowest bits of the result. + +##### Example + + +```fortran + program demo_universal_mult_hash + use stdlib_32_bit_hash_codes, only: odd_random_integer, & + universal_mult_hash + use iso_fortran_env, only: int64 + implicit none + integer, allocatable :: array1(:) + integer(int64) :: hash, i, seed, source + seed = 0 + allocate( array1(0:2**6-1) ) + do i = 0, 2**6-1 + array(i) = i + end do + call odd_random_integer( seed ) + source = int(Z'1FFFFFF', int64) + hash = universal_mult_hash(source, seed, 6) + azray1(hash) = source + print *, seed, hash, array1 + end program demo_universal_mult_hash +``` + + +### Test Codes + +The Fortran Standard Library provides two test codes for the hash +functions of `stdlib_32_bit_hash_functions` and +`stdlib_64_bit_hash_functions`, `test_32_bit_hash_performance` and +`test_64_bit_hash_performance` respectively. These are primarily set +up to test runtime performance of the functions. They take a sample of +`2**18` integers of kind `INT8` and break it up into vectors of size +1, 2, 4, 8, 16, 64, 256, and 1024 elements, yielding `2**18`, +`2**17`, `2**16`, `2**15`, `2**14`, `2**12`, `2**10`, and `2**8` +vectors respectively. These are then processed by the hash functions +4 times, and the time for processing is reported. Testing so far has +been on a MacBook Pro with a 2.3 GHz Quad-Core Intel Core i5 and 8 GB +2133 MHz LPDDR3 of RAM, using GNU Fortran (GCC) 11.1.0 to compile the +code. The results for `test_32_bit_hash_performance` is given by the +following table: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02949 | +| FNV-1 | 2 | 524288 | 0.02361 | +| FNV-1 | 4 | 262144 | 0.02016 | +| FNV-1 | 8 | 131072 | 0.01806 | +| FNV-1 | 16 | 65536 | 0.01867 | +| FNV-1 | 64 | 16384 | 0.01717 | +| FNV-1 | 256 | 4096 | 0.01759 | +| FNV-1 | 1024 | 1024 | 0.01659 | +| FNV-1a | 1 | 1048576 | 0.02897 | +| FNV-1a | 2 | 524288 | 0.02472 | +| FNV-1a | 4 | 262144 | 0.02025 | +| FNV-1a | 8 | 131072 | 0.01901 | +| FNV-1a | 16 | 65536 | 0.01898 | +| FNV-1a | 64 | 16384 | 0.01784 | +| FNV-1a | 256 | 4096 | 0.01723 | +| FNV-1a | 1024 | 1024 | 0.01673 | +| nmhash32 | 1 | 1048576 | 0.31092 | +| nmhash32 | 2 | 524288 | 0.16230 | +| nmhash32 | 4 | 262144 | 0.07815 | +| nmhash32 | 8 | 131072 | 0.04176 | +| nmhash32 | 16 | 65536 | 0.09261 | +| nmhash32 | 64 | 16384 | 0.04587 | +| nmhash32 | 256 | 4096 | 0.07238 | +| nmhash32 | 1024 | 1024 | 0.07263 | +| nmhash32x | 1 | 1048576 | 0.04294 | +| nmhash32x | 2 | 524288 | 0.02937 | +| nmhash32x | 4 | 262144 | 0.01096 | +| nmhash32x | 8 | 131072 | 0.00911 | +| nmhash32x | 16 | 65536 | 0.01291 | +| nmhash32x | 64 | 16384 | 0.00859 | +| nmhash32x | 256 | 4096 | 0.07373 | +| nmhash32x | 1024 | 1024 | 0.07618 | +| water | 1 | 1048576 | 0.12560 | +| water | 2 | 524288 | 0.06302 | +| water | 4 | 262144 | 0.04020 | +| water | 8 | 131072 | 0.01999 | +| water | 16 | 65536 | 0.01459 | +| water | 64 | 16384 | 0.00923 | +| water | 256 | 4096 | 0.00816 | +| water | 1024 | 1024 | 0.00792 | + +while for `test_64_bit_hash_performance` the results are: + +| Algorithm | Key Size | Key # | Time (s) | +| | Bytes | | | +|------------|-----------|------------|----------| +| FNV-1 | 1 | 1048576 | 0.02981 | +| FNV-1 | 2 | 524288 | 0.02697 | +| FNV-1 | 4 | 262144 | 0.02275 | +| FNV-1 | 8 | 131072 | 0.02431 | +| FNV-1 | 16 | 65536 | 0.02158 | +| FNV-1 | 64 | 16384 | 0.02007 | +| FNV-1 | 256 | 4096 | 0.01932 | +| FNV-1 | 1024 | 1024 | 0.02089 | +| FNV-1a | 1 | 1048576 | 0.03226 | +| FNV-1a | 2 | 524288 | 0.03076 | +| FNV-1a | 4 | 262144 | 0.02359 | +| FNV-1a | 8 | 131072 | 0.02542 | +| FNV-1a | 16 | 65536 | 0.02364 | +| FNV-1a | 64 | 16384 | 0.02130 | +| FNV-1a | 256 | 4096 | 0.01962 | +| FNV-1a | 1024 | 1024 | 0.01966 | +| Pengy | 1 | 1048576 | 0.24294 | +| Pengy | 2 | 524288 | 0.12066 | +| Pengy | 4 | 262144 | 0.06205 | +| Pengy | 8 | 131072 | 0.03138 | +| Pengy | 16 | 65536 | 0.01608 | +| Pengy | 64 | 16384 | 0.00669 | +| Pengy | 256 | 4096 | 0.00387 | +| Pengy | 1024 | 1024 | 0.00295 | +| Spooky | 1 | 1048576 | 0.11920 | +| Spooky | 2 | 524288 | 0.07478 | +| Spooky | 4 | 262144 | 0.03185 | +| Spooky | 8 | 131072 | 0.01468 | +| Spooky | 16 | 65536 | 0.01503 | +| Spooky | 64 | 16384 | 0.00440 | +| Spooky | 256 | 4096 | 0.00290 | +| Spooky | 1024 | 1024 | 0.00177 | + +As the tested function will typically reside in the instruction cache +these results do not include the costs of reloading the procedure if +hashing is intermittent. If hashing is intermittent then that can more +severely impact the performance of `nmhash32`, `nmhash32x`, +`water_hash`, `pengy_hash`, and `spooky_hash` relative to +`fnv_1_hash` and `fnv_1a_hash`. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bb9fb4fd8..d0d20c492 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,14 @@ # Create a list of the files to be preprocessed set(fppFiles + stdlib_32_bit_fnv_hashes.fypp + stdlib_32_bit_hash_functions.fypp + stdlib_32_bit_nmhashes.fypp + stdlib_32_bit_water_hashes.fypp + stdlib_64_bit_fnv_hashes.fypp + stdlib_64_bit_hash_functions.fypp + stdlib_64_bit_pengy_hashes.fypp + stdlib_64_bit_spookyv2_hashes.fypp stdlib_ascii.fypp stdlib_bitsets.fypp stdlib_bitsets_64.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index 179fc600f..78ea780fb 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -1,4 +1,12 @@ SRCFYPP = \ + stdlib_32_bit_fnv_hashes.fypp \ + stdlib_32_bit_hash_functions.fypp \ + stdlib_32_bit_nmhashes.fypp \ + stdlib_32_bit_water_hashes.fypp \ + stdlib_64_bit_fnv_hashes.fypp \ + stdlib_64_bit_hash_functions.fypp \ + stdlib_64_bit_pengy_hashes.fypp \ + stdlib_64_bit_spookyv2_hashes.fypp \ stdlib_ascii.fypp \ stdlib_bitsets_64.fypp \ stdlib_bitsets_large.fypp \ @@ -74,6 +82,22 @@ $(SRCGEN): %.f90: %.fypp common.fypp # Fortran module dependencies f18estop.o: stdlib_error.o +stdlib_32_bit_fnv_hashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_32_bit_hash_functions.o: \ + stdlib_kinds.o +stdlib_32_bit_nmhashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_32_bit_water_hashes.o: \ + stdlib_32_bit_hash_functions.o +stdlib_64_bit_fnv_hashes.o: \ + stdlib_64_bit_hash_functions.o +stdlib_64_bit_hash_functions.o: \ + stdlib_kinds.o +stdlib_64_bit_pengy_hashes.o: \ + stdlib_64_bit_hash_functions.o +stdlib_64_bit_spookyv2_hashes.o: \ + stdlib_64_bit_hash_functions.o stdlib_ascii.o: stdlib_kinds.o stdlib_bitsets.o: stdlib_kinds.o stdlib_bitsets_64.o: stdlib_bitsets.o diff --git a/src/stdlib_32_bit_fnv_hashes.fypp b/src/stdlib_32_bit_fnv_hashes.fypp new file mode 100644 index 000000000..562de2978 --- /dev/null +++ b/src/stdlib_32_bit_fnv_hashes.fypp @@ -0,0 +1,126 @@ +!!------------------------------------------------------------------------------ +!! `FNV_1_HASH` and `FNV_1A_Hash` are translations to Fortran 2008 of the +!! `FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +!! and Phong Vo, that has been released into the public domain. Permission +!! has been granted, by Landon Curt Noll, for the use of these algorithms +!! in the Fortran Standard Library. A description of these functions is +!! available at https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function. +!!------------------------------------------------------------------------------ + +!#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_fnv_hashes +!! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt +!! Noll, and Kiem-Phong-Vo, +!! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + implicit none + + integer(int_hash), parameter :: & + offset_basis = int( z'811C9DC5', int_hash ), & + prime = int( z'01000193', int_hash ) + +contains + + pure module function int8_fnv_1( key ) result(hash_code) +!! The original FNV-1 8-bit key algorithm. + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + hash_code = hash_code * prime + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + end do + + end function int8_fnv_1 + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +! A ${k1}$ array key wrapper for the FNV-1 algorithm. + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, 0_int8, & + bytes_${k1}$* & + size( key, kind=int64 ) ) ) + + end function ${k1}$_fnv_1 + +#:endfor + + + pure module function character_fnv_1( key ) result(hash_code) +! A default character key wrapper for the FNV-1 algorithm. + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, & + 0_int8, & + bytes_char* & + len(key, kind=int64) ) ) + + end function character_fnv_1 + + + pure module function int8_fnv_1a( key ) result(hash_code) +!! The original FNV-1a 8-bit key algorithm. + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + hash_code = hash_code * prime + end do + + end function int8_fnv_1a + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) +! A ${k1}$ array key wrapper for the FNV-1a algorithm. + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + bytes_${k1}$* & + size(key, kind=int64)) ) + + end function ${k1}$_fnv_1a + +#:endfor + + pure module function character_fnv_1a( key ) result(hash_code) +! A default character key wrapper for the FNV-1 algorithm. + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + (bits_char/bits_int8)* & + len(key, kind=int64) ) ) + + end function character_fnv_1a + +end submodule stdlib_32_bit_fnv_hashes diff --git a/src/stdlib_32_bit_hash_functions.fypp b/src/stdlib_32_bit_hash_functions.fypp new file mode 100644 index 000000000..9425a2280 --- /dev/null +++ b/src/stdlib_32_bit_hash_functions.fypp @@ -0,0 +1,244 @@ +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int8", "int16", "int32", "int64"] + +module stdlib_32_bit_hash_functions + + use, intrinsic :: iso_fortran_env, only : & + character_storage_size + + use stdlib_kinds, only: & + dp, & + int8, & + int16, & + int32, & + int64 + + implicit none + + private + + integer, parameter, public :: & + int_hash = int32 +!! The number of bits in the output hash + +! pow32_over_phi is the odd integer that most closely approximates 2**32/phi, +! where phi is the golden ratio 1.618... + integer(int32), parameter :: & + pow32_over_phi = int( z'9E3779B9', int32 ) + +! The number of bits used by each integer type + integer, parameter :: & +! Should be 8 + bits_int8 = bit_size(0_int8), & +! Should be 16 + bits_int16 = bit_size(0_int16), & +! Should be 32 + bits_int32 = bit_size(0_int32), & +! Should be 64 + bits_int64 = bit_size(0_int64) + + integer, parameter :: & +! Should be 8 + bytes_int8 = bits_int8/bits_int8, & +! Should be 16 + bytes_int16 = bits_int16/bits_int8, & +! Should be 32 + bytes_int32 = bits_int32/bits_int8, & +! Should be 64 + bytes_int64 = bits_int64/bits_int8 + + integer, parameter :: & + bits_char = character_storage_size, & + bytes_char = bits_char/bits_int8 + +! Dealing with different endians + logical, parameter, public :: & + little_endian = ( 1 == transfer([1_int8, 0_int8], 0_int16) ) + + public :: & + fibonacci_hash, & + fnv_1_hash, & + fnv_1a_hash, & + new_nmhash32_seed, & + new_nmhash32x_seed, & + new_water_hash_seed,& + nmhash32, & + nmhash32x, & + odd_random_integer, & + universal_mult_hash,& + water_hash + + + interface fnv_1_hash +!! FNV_1 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1 + + #:endfor + + pure module function character_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for default character string keys + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1 + + end interface fnv_1_hash + + interface fnv_1a_hash +!! FNV_1A interfaces + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_value) +!! FNV_1A hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_value + end function ${k1}$_fnv_1a + + #:endfor + + pure module function character_fnv_1a( key ) result(hash_value) +!! FNV_1A hash function for default character string keys + character(*), intent(in) :: key + integer(int_hash) :: hash_value + end function character_fnv_1a + + end interface fnv_1a_hash + + interface nmhash32 +!! NMHASH32 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function ${k1}$_nmhash32 + + #:endfor + + pure module function character_nmhash32( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for default character string keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function character_nmhash32 + + end interface nmhash32 + + interface nmhash32x +!! NMHASH32X interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32x( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function ${k1}$_nmhash32x + + #:endfor + + pure module function character_nmhash32x( key, seed ) & + result(hash_value) +!! NMHASH32 hash function for default character string keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_value + end function character_nmhash32x + + end interface nmhash32x + + interface water_hash +!! WATER_HASH interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_water_hash( key, seed ) & + result(hash_code) +!! WATER HASH function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + end function ${k1}$_water_hash + #:endfor + + pure module function character_water_hash( key, seed ) & + result(hash_code) +!! WATER hash function for default character string keys + character(*), intent(in) :: key + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + end function character_water_hash + + end interface water_hash + + interface new_water_hash_seed + + module subroutine new_water_hash_seed( seed ) + integer(int64), intent(inout) :: seed + end subroutine new_water_hash_seed + + end interface new_water_hash_seed + + interface new_nmhash32_seed + + module subroutine new_nmhash32_seed( seed ) + integer(int32), intent(inout) :: seed + end subroutine new_nmhash32_seed + + end interface new_nmhash32_seed + + interface new_nmhash32x_seed + + module subroutine new_nmhash32x_seed( seed ) + integer(int32), intent(inout) :: seed + end subroutine new_nmhash32x_seed + + end interface new_nmhash32x_seed + +contains + + pure function fibonacci_hash( key, nbits ) result( sample ) +!! Maps the 32 bit integer KEY to an unsigned integer value with only NBITS +!! bits where NBITS is less than 32 + integer(int32), intent(in) :: key + integer, intent(in) :: nbits + integer(int32) :: sample + + sample = ishft( key*pow32_over_phi, -32 + nbits ) + + end function fibonacci_hash + + pure function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Uses the "random" odd 32 bit integer SEED to map the 32 bit integer KEY to +!! an unsigned integer value with only NBITS bits where NBITS is less than 32 + integer(int32), intent(in) :: key + integer(int32), intent(in) :: seed + integer, intent(in) :: nbits + integer(int32) :: sample + + sample = ishft( key*seed, -32 + nbits ) + + end function universal_mult_hash + + subroutine odd_random_integer( harvest ) +!! Returns a 32 bit pseudo random integer, HARVEST, distributed uniformly over +!! the odd integers of the INT32 kind. + integer(int32), intent(out) :: harvest + real(dp) :: sample + + call random_number( sample ) + harvest = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + harvest = ishft( harvest, 1 ) + 1_int32 + + end subroutine odd_random_integer + +end module stdlib_32_bit_hash_functions diff --git a/src/stdlib_32_bit_nmhashes.fypp b/src/stdlib_32_bit_nmhashes.fypp new file mode 100644 index 000000000..ba1fcb504 --- /dev/null +++ b/src/stdlib_32_bit_nmhashes.fypp @@ -0,0 +1,801 @@ +!!------------------------------------------------------------------------------ +!! `NM_HASH32` and `NM_HASH32X` are translations to Fortran 2008 and signed +!! two's complement arithmetic of the `nmhash32` and `nmhash32x` V. 2 scalar +!! algorithms of James Z. M. Gao, copyright 2021. James Z. M. Gao's original +!! C++ code, `nmhash.h`, is available at the URL: +!! https://github.com/gzm55/hash-garage/blob/a8913138bdb3b7539c202edee30a7f0794bbd835/nmhash.h +!! under the BSD 2-Clause License: +!! https://github.com/gzm55/hash-garage/blob/a8913138bdb3b7539c202edee30a7f0794bbd835/LICENSE +!! The algorithms come in multiple versions, depending on whether the +!! vectorized instructions SSE2 or AVX2 are available. As neither instruction +!! is available in portable Fortran 2008, the algorithms that do not use these +!! instructions. +!! +!! The BSD 2-Clause license is as follows: +!! +!! BSD 2-Clause License +!! +!! Copyright (c) 2021, water hash algorithm. James Z.M. Gao +!! All rights reserved. +!! +!! Redistribution and use in source and binary forms, with or without +!! modification, are permitted provided that the following conditions are met: +!! +!! 1. Redistributions of source code must retain the above copyright notice, +!! this list of conditions and the following disclaimer. +!! +!! 2. Redistributions in binary form must reproduce the above copyright notice, +!! this list of conditions and the following disclaimer in the documentation +!! and/or other materials provided with the distribution. +!! +!! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +!! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +!! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +!! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!! POSSIBILITY OF SUCH DAMAGE. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_nmhashes + + implicit none + +! Primes from XXH + integer(int32), parameter :: nmh_prime32_1 = int( Z'9E3779B1', int32 ) + integer(int32), parameter :: nmh_prime32_2 = int( Z'85EBCA77', int32 ) + integer(int32), parameter :: nmh_prime32_3 = int( Z'C2B2AE3D', int32 ) + integer(int32), parameter :: nmh_prime32_4 = int( Z'27D4EB2F', int32 ) + + integer(int32), parameter :: nmh_m1 = int(z'F0D9649B', int32 ) + integer(int32), parameter :: nmh_m2 = int(z'29A7935D', int32 ) + integer(int32), parameter :: nmh_m3 = int(z'55D35831', int32 ) + + integer(int32), parameter :: nmh_m1_v(0:31) = nmh_m1 + integer(int32), parameter :: nmh_m2_v(0:31) = nmh_m2 + integer(int32), parameter :: nmh_m3_v(0:31) = nmh_m3 + + integer(int16), parameter :: nmh_m3_16(2) = transfer( nmh_m3, 0_int16, 2 ) + + logical, parameter :: nmh_short32_without_seed2=.false. + logical, parameter :: nmh_short32_with_seed2=.true. + + integer, parameter :: init_size = 32 + +! Pseudorandom secrets taken directly from FARSH. + integer(int32), parameter :: nmh_acc_init(0:init_size-1) = [ & + int( z'B8FE6C39', int32 ), int( z'23A44BBE', int32 ), & + int( z'7C01812C', int32 ), int( z'F721AD1C', int32 ), & + int( z'DED46DE9', int32 ), int( z'839097DB', int32 ), & + int( z'7240A4A4', int32 ), int( z'B7B3671F', int32 ), & + int( z'CB79E64E', int32 ), int( z'CCC0E578', int32 ), & + int( z'825AD07D', int32 ), int( z'CCFF7221', int32 ), & + int( z'B8084674', int32 ), int( z'F743248E', int32 ), & + int( z'E03590E6', int32 ), int( z'813A264C', int32 ), & + + int( z'3C2852BB', int32 ), int( z'91C300CB', int32 ), & + int( z'88D0658B', int32 ), int( z'1B532EA3', int32 ), & + int( z'71644897', int32 ), int( z'A20DF94E', int32 ), & + int( z'3819EF46', int32 ), int( z'A9DEACD8', int32 ), & + int( z'A8FA763F', int32 ), int( z'E39C343F', int32 ), & + int( z'F9DCBBC7', int32 ), int( z'C70B4F1D', int32 ), & + int( z'8A51E04B', int32 ), int( z'CDB45931', int32 ), & + int( z'C89F7EC9', int32 ), int( z'D9787364', int32 ) ] + +contains + + pure function nmh_readle32( p ) result( v ) + integer(int32) :: v + integer(int8), intent(in) :: p(1:4) + + if ( little_endian ) then + v = transfer( p(1:4), 0_int32 ) + else + v = transfer( [ p(4), p(3), p(2), p(1) ], 0_int32 ) + end if + + end function nmh_readle32 + + pure function nmh_readle16( p ) result( v ) + integer(int16) :: v + integer(int8), intent(in) :: p(1:2) + + if ( little_endian ) then + v = transfer( p(1:2), 0_int16 ) + else + v = transfer( [ p(2), p(1) ], 0_int16 ) + end if + + end function nmh_readle16 + + pure function nmhash32_0to8( x, seed ) result( vx32 ) + integer(int32), intent(in) :: x + integer(int32), intent(in) :: seed + integer(int32) :: vx32 + ! base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] + ! = 0.027071104091278835 + integer(int32), parameter :: m1 = int(z'776BF593', int32) + integer(int32), parameter :: m2 = int(z'3FB39C65', int32) + integer(int32), parameter :: m3 = int(z'E9139917', int32) + + integer(int16) :: vx16(0:1) + integer(int16), parameter :: & + m116(0:1) = transfer( m1, 0_int16, 2 ), & + m216(0:1) = transfer( m2, 0_int16, 2 ), & + m316(0:1) = transfer( m3, 0_int16, 2 ) + + vx32 = x + vx32 = ieor( vx32, ieor( ishft( vx32, -12 ), ishft( vx32, -6 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m116 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, ieor( ishft( vx32, 11 ), ishft( vx32, -19 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m216 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, seed ) + vx32 = ieor( vx32, ieor( ishft( vx32, -15 ), ishft( vx32, -9 ) ) ) + vx16 = transfer( vx32, 0_int16, 2 ) + vx16 = vx16 * m316 + vx32 = transfer( vx16, 0_int32 ) + vx32 = ieor( vx32, ieor( ishft(vx32, 16), ishft(vx32, -11) ) ) + + end function nmhash32_0to8 + + pure function nmhash32_9to255( p, seed, full_avalanche ) result( hash ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + logical, intent(in) :: full_avalanche + integer(int32) :: hash + + integer(int32) :: xu32(0:3), yu32(0:3) + integer(int16) :: xu16(0:1) + integer(int16), parameter :: & + nmh_m1_16(0:1) = transfer( nmh_m1, 0_int16, 2 ), & + nmh_m2_16(0:1) = transfer( nmh_m2, 0_int16, 2 ), & + nmh_m3_16(0:1) = transfer( nmh_m3, 0_int16, 2 ) + integer(int32) :: s1 + integer(int64) :: length + integer(int32) :: length32(0:1) + integer(int64) :: i, j, r + + ! base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = + ! 0.93495901789135362 + + length = size( p, kind=int64 ) + length32 = transfer(length, 0_int32, 2) + if (little_endian) then + s1 = seed + length32(0) + else + s1 = seed + length32(1) + end if + xu32(0) = nmh_prime32_1 + xu32(1) = nmh_prime32_2 + xu32(2) = nmh_prime32_3 + xu32(3) = nmh_prime32_4 + yu32(:) = s1 + + if (full_avalanche) then + ! 33 to 255 bytes + r = (length - 1 ) /32 + do i=0, r-1 + do j=0, 3 + xu32(j) = ieor( xu32(j), nmh_readle32( p(i*32 + j*4: ) ) ) + yu32(j) = ieor( yu32(j), & + nmh_readle32( p(i*32 + j*4 + 16: ) ) ) + xu32(j) = xu32(j) + yu32(j) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m1_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j), 5), & + ishft(xu32(j), -13)) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m2_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), yu32(j) ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j), 11), & + ishft(xu32(j), -9) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), & + ieor( ishft(xu32(j),-10), & + ishft(xu32(j), -20) ) ) + end do + end do + do j=0, 3 + xu32(j) = ieor( xu32(j), & + nmh_readle32( p(length - 32 + j*4: ) ) ) + yu32(j) = ieor( yu32(j), & + nmh_readle32( p(length - 16 + j*4: ) ) ) + end do + else + ! 9 to 32 bytes + xu32(0) = ieor(xu32(0), nmh_readle32(p(0:))) + xu32(1) = ieor(xu32(1), nmh_readle32(p(ishft(ishft(length,-4),3):))) + xu32(2) = ieor(xu32(2), nmh_readle32(p(length-8:))) + xu32(3) = ieor(xu32(3), & + nmh_readle32(p(length-8-ishft(ishft(length,-4),3):))) + yu32(0) = ieor(yu32(0), nmh_readle32(p(4:))) + yu32(1) = ieor(yu32(1), & + nmh_readle32(p(ishft(ishft(length,-4),3)+4:))) + yu32(2) = ieor(yu32(2), nmh_readle32(p(length-8+4:))) + yu32(3) = ieor(yu32(3), & + nmh_readle32(p(length - 8 - & + ishft(ishft(length,-4),3)+4:))) + end if + do j=0, 3 + xu32(j) = xu32(j) + yu32(j) + yu32(j) = ieor( yu32(j), ieor(ishft(yu32(j), 17), & + ishft(yu32(j), -6) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m1_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), 5), & + ishft(xu32(j), -13) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m2_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), yu32(j) ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), 11), & + ishft(xu32(j), -9) ) ) + xu16 = transfer( xu32(j), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(j) = transfer( xu16, 0_int32 ) + xu32(j) = ieor( xu32(j), ieor(ishft(xu32(j), -10), & + ishft(xu32(j), -20) ) ) + end do + xu32(0) = ieor( xu32(0), nmh_prime32_1 ) + xu32(1) = ieor( xu32(1), nmh_prime32_2 ) + xu32(2) = ieor( xu32(2), nmh_prime32_3 ) + xu32(3) = ieor( xu32(3), nmh_prime32_4 ) + do j=1, 3 + xu32(0) = xu32(0) + xu32(j) + end do + xu32(0) = ieor(xu32(0), s1 + ishft(s1, -5) ) + xu16 = transfer( xu32(0), 0_int16, 2 ) + xu16 = xu16 * nmh_m3_16 + xu32(0) = transfer( xu16, 0_int32 ) + xu32(0) = ieor(xu32(0), & + ieor(ishft(xu32(0), -10), ishft(xu32(0), -20) ) ) + hash = xu32(0) + + end function nmhash32_9to255 + + pure function nmhash32_9to32( p, seed ) result( result ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: result + + result = nmhash32_9to255( p, seed, .false. ) + + end function nmhash32_9to32 + + pure function nmhash32_33to255( p, seed ) result( result ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: result + + result = nmhash32_9to255( p, seed, .true. ) + + end function nmhash32_33to255 + + pure subroutine nmhash32_long_round( accx, accy, p ) + integer(int32), intent(inout) :: accx(0:) + integer(int32), intent(inout) :: accy(0:) + integer(int8), intent(in) :: p(0:) + + integer(int64), parameter :: nbgroups = init_size + integer(int64) :: i + integer(int16) :: dummy1(0:1) + integer(int16) :: dummy2(0:1) + + do i = 0, nbgroups-1 + accx(i) = ieor( accx(i), nmh_readle32( p(i*4:) ) ) + accy(i) = ieor( accy(i), nmh_readle32( p(i*4+nbgroups*4:) ) ) + accx(i) = accx(i) + accy(i) + accy(i) = ieor( accy(i), ishft(accx(i), -1) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m1_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i), 5), & + ishft(accx(i),-13) ) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m2_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), accy(i) ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i), 11), & + ishft(accx(i),-9) ) ) + dummy1 = transfer( accx(i), 0_int16, 2 ) + dummy2 = transfer( nmh_m3_v(i), 0_int16, 2 ) + dummy1 = dummy1 * dummy2 + accx(i) = transfer( dummy1, 0_int32 ) + accx(i) = ieor( accx(i), ieor( ishft(accx(i),-10), & + ishft(accx(i),-20) ) ) + end do + + end subroutine nmhash32_long_round + + pure function nmhash32_long( p, seed ) result( sum ) + integer(int32) :: sum + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + + integer(int32) :: accx(0:size(nmh_acc_init)-1) + integer(int32) :: accy(0:size(nmh_acc_init)-1) + integer(int64) :: nbrounds + integer(int64) :: len + integer(int32) :: len32(0:1) + integer(int64) :: i + + len = size( p, kind=int64 ) + nbrounds = (len-1) / ( 4*size(accx, kind=int64) * 2 ) + sum = 0 + +! Init + do i=0_int64, size(nmh_acc_init, kind=int64)-1 + accx(i) = nmh_acc_init(i) + accy(i) = seed + end do + + ! init + do i=0_int64, nbrounds-1 + call nmhash32_long_round( accx, accy, & + p(i*8*size(accx, kind=int64):) ) + end do + call nmhash32_long_round( accx, accy, & + p(len-8*size(accx, kind=int64):) ) + + ! merge acc + do i=0, size( accx, kind=int64 )-1 + accx(i) = ieor( accx(i), nmh_acc_init(i) ) + sum = sum + accx(i) + end do + + len32 = transfer(len, 0_int32, 2) + if ( little_endian ) then + sum = sum + len32(1) + sum = ieor(sum, len32(0)) + else + sum = sum + len32(0) + sum = ieor(sum, len32(1)) + end if + + end function nmhash32_long + + pure function nmhash32_avalanche32( x ) result( u32 ) + integer(int32) :: u32 + integer(int32), intent(in) :: x + + integer(int16) :: u16(0:1) + integer(int32), parameter:: m1 = int(z'CCE5196D', int32) + integer(int32), parameter:: m2 = int(z'464BE229', int32) + integer(int16), parameter:: m1_16(0:1) = transfer(m1, 0_int16, 2) + integer(int16), parameter:: m2_16(0:1) = transfer(m2, 0_int16, 2) + ! [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 + + u32 = x + u32 = ieor( u32, ieor( ishft( u32, -8 ), ishft( u32, -21 ) ) ) + u16 = transfer( u32, 0_int16, 2 ) + u16 = u16 * m1_16 + u32 = transfer( u16, 0_int32 ) + u32 = ieor( u32, ieor( ishft( u32, 12 ), ishft( u32, -7 ) ) ) + u16 = transfer( u32, 0_int16, 2 ) + u16 = u16 * m2_16 + u32 = transfer( u16, 0_int32 ) + u32 = ieor( u32, ieor( ishft( u32, -8 ), ishft( u32, -21 ) ) ) + + end function nmhash32_avalanche32 + + pure module function int8_nmhash32( key, seed ) result( hash ) +!! NMHASH32 hash function for rank 1 array keys of kind INT8 + integer(int32) :: hash + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + integer(int64) :: len + integer(int32) :: u32 + integer(int16) :: u16(0:1) + integer(int32) :: x, y + integer(int32) :: new_seed + + len = size( key, kind=int64 ) + if ( len <= 32 ) then + if ( len > 8 ) then + hash = nmhash32_9to32( key, seed ) + return + else if ( len > 4 ) then + x = nmh_readle32(key) + y = ieor( nmh_readle32(key(len-4:)), nmh_prime32_4 + 2 + seed ) + x = x + y + x = ieor( x, ishft(x, len + 7 ) ) + hash = nmhash32_0to8( x, ishftc(y, 5) ) + return + else + select case(len) + case(0) + new_seed = seed + nmh_prime32_2 + u32 = 0 + case(1) + new_seed = seed + nmh_prime32_2 + ishft(1_int32, 24) + & + 2_int32 + if ( little_endian ) then + u32 = transfer( [key(0), 0_int8, 0_int8, 0_int8], & + 0_int32 ) + else + u32 = transfer( [0_int8, 0_int8, 0_int8, key(0)], & + 0_int32 ) + end if + case(2) + new_seed = seed + nmh_prime32_2 + ishft(2_int32, 24) + & + 4_int32 + if (little_endian) then + u32 = transfer( [nmh_readle16(key), 0_int16], 0_int32 ) + else + u32 = transfer( [0_int16, nmh_readle16(key)], 0_int32 ) + end if + case(3) + new_seed = seed + nmh_prime32_2 + ishft(3_int32, 24) + & + 6_int32 + if ( little_endian ) then + u16(1) = transfer( [key(2), 0_int8], 0_int16 ) + u16(0) = nmh_readle16( key ) + else + u16(0) = transfer( [0_int8, key(2)], 0_int16 ) + u16(1) = nmh_readle16( key ) + end if + u32 = transfer( u16, 0_int32 ) + case(4) + new_seed = seed + nmh_prime32_3 + u32 = nmh_readle32(key) + case default + hash = 0 + return + end select + hash = nmhash32_0to8(u32+new_seed, ishftc(new_seed, 5) ) + return + end if + else if ( len < 256_int64 ) then + hash = nmhash32_33to255( key, seed ) + return + else + hash = nmhash32_avalanche32( nmhash32_long(key, seed )) + return + end if + + end function int8_nmhash32 + + pure function nmhash32x_0to4( x, seed ) result( hash ) + integer(int32), intent(in) :: x + integer(int32), intent(in) :: seed + integer(int32) :: hash + + ! [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 + + hash = x + hash = ieor( hash, seed ) + hash = hash * int(z'BDAB1EA9', int32) + hash = hash + ishftc(seed, 31) + hash = ieor( hash, ishft(hash, -18) ) + hash = hash * int(z'A7896A1B', int32) + hash = ieor( hash, ishft(hash, -12) ) + hash = hash * int(z'83796A2D', int32) + hash = ieor( hash, ishft(hash, -16) ) + + end function nmhash32x_0to4 + + pure function nmhash32x_5to8( p, seed ) result( x ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: x + + integer(int64) :: len + integer(int32) :: y + + ! 5 to 9 bytes + ! mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + + len = size(p, kind=int64) + x = ieor( nmh_readle32(p), nmh_prime32_3 ) + y = ieor( nmh_readle32(p(len-4:)), seed ) + x = x + y + x = ieor( x, ishft(x, -len) ) + x = x * int(z'11049A7D', int32) + x = ieor( x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32) + x = ieor( x, ishftc(y, 3) ) + x = ieor( x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + x = ieor( x, ishft(x, -12) ) + + end function nmhash32x_5to8 + + pure function nmhash32x_9to255( p, seed ) result( x ) + integer(int8), intent(in) :: p(0:) + integer(int32), intent(in) :: seed + integer(int32) :: x + + integer(int64) :: len + integer(int32) :: len32(0:1) + integer(int8) :: len8(0:7) + integer(int32) :: len_base + integer(int32) :: y + integer(int32) :: a, b + integer(int64) :: i, r + + ! - at least 9 bytes + ! base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + ! tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 + + len = size(p, kind=int64) + len8 = transfer(len, 0_int8, 8) + len32 = transfer(len, 0_int32, 2) + if (little_endian) then + len_base = transfer( [len8(0), 0_int8, 0_int8, 0_int8], 0_int32 ) + else + len_base = transfer( [0_int8, 0_int8, 0_int8, len8(7)], 0_int32 ) + end if + x = nmh_prime32_3 + y = seed + a = nmh_prime32_4 + b = seed + r = (len - 1)/16 + + do i=0, r-1 + x = ieor(x, nmh_readle32( p(i*16 + 0:) ) ) + y = ieor(y, nmh_readle32( p(i*16 + 4:) ) ) + x = ieor(x, y) + x = x * int(z'11049A7D', int32) + x = ieor(x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32) + y = ishftc(y, 4) + x = ieor(x, y) + x = ieor(x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + x = ieor(x, ishft(x, -12) ) + + a = ieor(a, nmh_readle32(p(i*16 + 8:))) + b = ieor(b, nmh_readle32(p(i*16 + 12:))) + a = ieor(a, b) + a = a * int(z'11049A7D', int32) + a = ieor(a, ishft(a, -23) ) + a = a * int(z'BCCCDC7B', int32) + b = ishftc(b, 3) + a = ieor(a, b) + a = ieor(a, ishft(a, -12) ) + a = a * int(z'065E9DAD', int32) + a = ieor(a, ishft(a, -12) ) + end do + + if ( iand(len_base-1_int32, 8_int32) /= 0 ) then + if ( iand(len_base-1_int32, 4_int32) /= 0 ) then + a = ieor( a, nmh_readle32( p(r*16 + 0:) ) ) + b = ieor( b, nmh_readle32( p(r*16 + 4:) ) ) + a = ieor(a, b) + a = a * int(z'11049A7D', int32) + a = ieor(a, ishft(a, -23) ) + a = a * int(z'BCCCDC7B', int32) + a = ieor(a, ishftc(b, 4)) + a = ieor(a, ishft(a, -12)) + a = a * int(z'065E9DAD', int32) + else + a = ieor( a, nmh_readle32( p(r*16:) ) + b ) + a = ieor( a, ishft(a, -16) ) + a = a * int(z'A52FB2CD', int32) + a = ieor( a, ishft(a, -15) ) + a = a * int(z'551E4D49', int32) + end if + x = ieor( x, nmh_readle32( p(len - 8:) ) ) + y = ieor( y, nmh_readle32( p(len - 4:) ) ) + x = ieor( x, y ) + x = x * int(z'11049A7D', int32) + x = ieor( x, ishft(x, -23) ) + x = x * int(z'BCCCDC7B', int32); + x = ieor( x, ishftc(y, 3) ) + x = ieor( x, ishft(x, -12) ) + x = x * int(z'065E9DAD', int32) + else + if ( iand(len_base-1_int32, 4_int32) /= 0) then + a = ieor(a, nmh_readle32(p( r * 16:) ) + b ) + a = ieor( a, ishft(a,-16) ) + a = a * int(z'A52FB2CD', int32) + a = ieor( a, ishft(a,-15) ) + a = a * int(z'551E4D49', int32) + end if + x = ieor( x, nmh_readle32(p( len - 4:) ) + y ) + x = ieor( x, ishft(x,-16) ) + x = x * int(z'A52FB2CD', int32) + x = ieor( x, ishft(x,-15) ) + x = x * int(z'551E4D49', int32) + end if + + if ( little_endian ) then + x = ieor(x, len32(0) ) + else + x = ieor(x, len32(1) ) + end if + x = ieor(x, ishftc(a, 27)) ! rotate one lane to pass Diff test + x = ieor(x, ishft(x,-14)) + x = x * int(z'141CC535', int32 ) + + end function nmhash32x_9to255 + + pure function nmhash32x_avalanche32( x ) result(hash) + integer(int32) :: hash + integer(int32), intent(in) :: x +! Mixer with 2 mul from skeeto/hash-prospector: +! [15 d168aaad 15 af723597 15] = 0.15983776156606694 + + hash = x + hash = ieor( hash, ishft( hash, -15 ) ) + hash = hash * int( z'D168AAAD', int32 ) + hash = ieor( hash, ishft( hash, -15 ) ) + hash = hash * int( z'AF723597', int32 ) + hash = ieor( hash, ishft( hash, -15 ) ) + + end function nmhash32x_avalanche32 + + pure module function int8_nmhash32x( key, seed ) result(hash) +!! NMHASH32x hash function for rank 1 array keys of kind INT8 + integer(int32) :: hash + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + + integer(int64) :: len + integer(int32) :: seed2 + integer(int32) :: u32 + integer(int16) :: u16(0:1) + + len = size( key, kind=int64 ) + if ( len <= 8 ) then + if ( len > 4 ) then + hash = nmhash32x_5to8( key, seed ) + return + else ! 0 to 4 bytes + select case (len) + case(0) + seed2 = seed + nmh_prime32_2 + u32 = 0 + case(1) + seed2 = seed + nmh_prime32_2 + ishft(1_int32, 24) + & + ishft(1_int32, 1) + if (little_endian) then + u32 = transfer( [key(0), 0_int8, 0_int8, 0_int8], & + 0_int32 ) + else + u32 = transfer( [0_int8, 0_int8, 0_int8, key(0)], & + 0_int32 ) + end if + case(2) + seed2 = seed + nmh_prime32_2 + ishft(2_int32, 24) + & + ishft(2_int32, 1) + if (little_endian) then + u32 = transfer( [nmh_readle16(key), 0_int16], 0_int32 ) + else + u32 = transfer( [0_int16, nmh_readle16(key)], 0_int32 ) + end if + case(3) + seed2 = seed + nmh_prime32_2 + ishft(3_int32, 24) + & + ishft(3_int32, 1) + if (little_endian ) then + u16(1) = transfer( [ key(2), 0_int8 ], 0_int16 ) + u16(0) = nmh_readle16(key) + else + u16(0) = transfer( [ 0_int8, key(2) ], 0_int16 ) + u16(1) = nmh_readle16(key) + end if + u32 = transfer( u16, 0_int32 ) + case(4) + seed2 = seed + nmh_prime32_1 + u32 = nmh_readle32(key) + case default + hash = 0 + return + end select + hash = nmhash32x_0to4(u32, seed2) + return + end if + end if + if (len < 256) then + hash = nmhash32x_9to255(key, seed) + return + end if + hash = nmhash32x_avalanche32(nmhash32_long(key, seed)) + + end function int8_nmhash32x + +#:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32( key, seed ) result(hash_code) +!! NMHASH32 hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_nmhash32 + +#:endfor + + pure module function character_nmhash32( key, seed ) result(hash_code) +!! NMHASH32 hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_nmhash32 + +#:for k1 in INT_KINDS + pure module function ${k1}$_nmhash32x( key, seed ) result(hash_code) +!! NMHASH32X hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32x( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_nmhash32x + +#:endfor + + pure module function character_nmhash32x( key, seed ) result(hash_code) +!! NMHASH32X hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int32) :: hash_code + + hash_code = int8_nmhash32x( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_nmhash32x + + module subroutine new_nmhash32_seed( seed ) +! Random SEED generator for NMHASH32 + integer(int32), intent(inout) :: seed + + integer(int32) :: old_seed + real(dp) :: sample + + old_seed = seed + find_seed:do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_nmhash32_seed + + module subroutine new_nmhash32x_seed( seed ) +! Random SEED generator for NMHASH32X + integer(int32), intent(inout) :: seed + + integer(int32) :: old_seed + real(dp) :: sample + + old_seed = seed + find_seed:do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_nmhash32x_seed + +end submodule stdlib_32_bit_nmhashes diff --git a/src/stdlib_32_bit_water_hashes.fypp b/src/stdlib_32_bit_water_hashes.fypp new file mode 100644 index 000000000..33181ab3f --- /dev/null +++ b/src/stdlib_32_bit_water_hashes.fypp @@ -0,0 +1,282 @@ +!!------------------------------------------------------------------------------ +!! `WATER_HASH` is a translation to Fortran 2008 of the `waterhash` algorithm +!! of Tommy Ettinger. Tommy Ettinger's original C++ code, `waterhash.h`, is +!! available at the URL: https://github.com/tommyettinger/waterhash under the +!! `unlicense`, https://github.com/tommyettinger/waterhash/blob/master/LICENSE. +!! "`waterhash` is a variant on Wang Yi's `wyhash`, with 32 bit output, +!! using at most 64 bit arithmetic. `wyhash` is available at the URL: +!! `https://github.com/wangyi-fudan/wyhash` also under the unlicense: +!! `https://github.com/wangyi-fudan/wyhash/blob/master/LICENSE`. +!! Original Author: Wang Yi +!! Waterhash Variant Author: Tommy Ettinger +!! +!! The `unlicense` reads as follows: +!! This is free and unencumbered software released into the public domain. +!! +!! Anyone is free to copy, modify, publish, use, compile, sell, or +!! distribute this software, either in source code form or as a compiled +!! binary, for any purpose, commercial or non-commercial, and by any +!! means. +!! +!! In jurisdictions that recognize copyright laws, the author or authors +!! of this software dedicate any and all copyright interest in the +!! software to the public domain. We make this dedication for the benefit +!! of the public at large and to the detriment of our heirs and +!! successors. We intend this dedication to be an overt act of +!! relinquishment in perpetuity of all present and future rights to this +!! software under copyright law. +!! +!! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +!! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +!! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +!! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +!! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +!! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +!! OTHER DEALINGS IN THE SOFTWARE. +!! +!! For more information, please refer to +!! +!! `WATER_HASH` is distributed as part of the `stdlib_32_bit_hash_functions.f90` +!! module and its `stdlib_32_bit_water_hashes.f90` submodule with the Fortran +!! Standard Library at URL: https://github.com/fortran-lang/stdlib. +!! The Fortran Standard Library, including this code, is distributed under the +!! MIT License as described in the `LICENSE` file distributed with the library. +!! `WATER_HASH` differs from `waterhash.h` not only in its use of Fortran, +!! but also in its use of signed two's complement arithmetic in contrast to +!! the unsigned arithmetic of Ettinger and Wang Yi, and in making some of the +!! uses of `TRANSFER` endian dependent, in an attempt to make the quality of +!! the hash endian independent. The use of signed arithmetic may change with +!! the planned introduction of the unsigned BITS datatype in what is currently +!! known as Fortran 202X. +!! +!! To be useful this code must be processed by a processor that implements two +!! Fortran 2008 extensions to Fortran 2003: submodules, and 64 bit (`INT64`) +!! integers. The processor must also use two's complement integers +!! (all Fortran 95+ processors use two's complement arithmetic) with +!! wrap around overflow at runtime and for BOZ constants. The latest releases +!! of the following processors are known to implement the required Fortran +!! 2008 extensions and default to runtime wrap around overflow: FLANG, +!! gfortran, ifort, and NAG Fortran. Older versions of gfortran will require +!! the compiler flag, `-fno-range-check`, to ensure wrap around semantics +!! for BOZ constants, and only versions of the NAG compiler starting with +!! version 17, have implemented submodules. The latest releases of Cray +!! Fortran and IBM Fortran are known to implement the Fortran 2008 extensions, +!! but whether they also implement wrap around overflow is unknown. +!! +!! This implementation has only been tested on little endian processors. It +!! will generate different hashes on big endian processors, but they are +!! believed to be of comparable quality to those generated for little endian +!! processors. +!! +!! No version of this hash is suitable as a cryptographic hash. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_32_bit_hash_functions) stdlib_32_bit_water_hashes + implicit none + +contains + + pure module function int8_water_hash( key, seed ) result(hash_code) + integer(int32) :: hash_code + integer(int8), intent(in) :: key(0:) + integer(int64), intent(in) :: seed + + integer(int32) :: dummy(2) + integer(int64) :: h + integer(int64) :: i + integer(int64) :: len + integer(int64), parameter :: & + waterp0 = int(z'a0761d65', kind=int64), & + waterp1 = int(z'e7037ed1', kind=int64), & + waterp2 = int(z'8ebc6af1', kind=int64), & + waterp3 = int(z'589965cd', kind=int64), & + waterp4 = int(z'1d8e4e27', kind=int64), & + waterp5 = int(z'eb44accb', kind=int64) + + len = size(key, kind=int64) + h = seed + do i = 0_int64, len-16, 16 + h = watermum(watermum(ieor(waterr32(key(i:)),waterp1), & + ieor(waterr32(key(i+4:)),waterp2)) + h, & + watermum(ieor(waterr32(key(i+8:)),waterp3), & + ieor(waterr32(key(i+12:)),waterp4))) + end do + h = h + waterp5 + + select case( iand(len, 15_int64) ) + case(1) + h = watermum(ieor(waterp2, h), & + ieor(waterr08(key(i:)), waterp1)) + case(2) + h = watermum(ieor(waterp3, h), & + ieor(waterr16(key(i:)), waterp4)) + case(3) + h = watermum(ieor(waterr16(key(i:)), h), & + ieor(waterr08(key(i+2:)), waterp2)) + case(4) + h = watermum(ieor(waterr16(key(i:)), h), & + ieor(waterr16(key(i+2:)), waterp3)) + case(5) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr08(key(i+4:)), waterp1)) + case(6) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr16(key(i+4:)), waterp1)) + case(7) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(ior(ishft(waterr16(key(i+4:)), 8), & + waterr08(key(i+6:))), waterp1)) + case(8) + h = watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp0)) + case(9) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterp4), & + ieor(waterr08(key(i+8:)), waterp3))) + case(10) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(h, ieor(waterr16(key(i+8:)), waterp3))) + case(11) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(h, & + ieor(ior(ishft(waterr16(key(i+8:)),8), & + waterr08(key(i+10:))), & + waterp3))) + case(12) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + waterp4)) + case(13) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(waterr08(key(i+12:)), waterp4))) + case(14) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(waterr16(key(i+12:)), waterp4))) + case(15) + h = ieor(watermum(ieor(waterr32(key(i:)), h), & + ieor(waterr32(key(i+4:)), waterp2)), & + watermum(ieor(h, waterr32(key(i+8:))), & + ieor(ior(ishft(waterr16(key(i+12:)),8), & + waterr08(key(i+14:))), & + waterp4))) + end select + + h = ieor( h, ishft(h,16) ) * ieor( len, waterp0 ) + h = h - ishft( h, -32 ) + dummy(1:2) = transfer(h, dummy, 2) + if (little_endian) then + hash_code = dummy(1) + else + hash_code = dummy(2) + end if + + contains + + pure function watermum( a, b ) result(r) + integer(int64) :: r + integer(int64), intent(in) :: a, b + + r = a * b + r = r - ishft(r, -32) + + end function watermum + + pure function waterr08( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, p(1) ], v ) + end if + + end function waterr08 + + pure function waterr16( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), p(2), 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, p(2), p(1) ], v ) + end if + + end function waterr16 + + pure function waterr32( p ) result(v) + integer(int64) :: v + integer(int8), intent(in) :: p(:) + + if (little_endian) then + v = transfer( [ p(1), p(2), p(3), p(4), & + 0_int8, 0_int8, 0_int8, 0_int8 ], v ) + else + v = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + p(4), p(3), p(2), p(1) ], v ) + end if + + end function waterr32 + + end function int8_water_hash + + +#:for k1 in INT_KINDS + pure module function ${k1}$_water_hash( key, seed ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + + hash_code = int8_water_hash( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_water_hash + +#:endfor + + pure module function character_water_hash( key, seed ) result(hash_code) + character(*), intent(in) :: key + integer(int64), intent(in) :: seed + integer(int_hash) :: hash_code + + hash_code = int8_water_hash( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_water_hash + + module subroutine new_water_hash_seed( seed ) + integer(int64), intent(inout) :: seed + + integer(int64) :: old_seed + + real(dp) :: sample(2) + integer(int32) :: part(2) + + old_seed = seed + find_seed:do + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + seed = transfer( part, seed ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_water_hash_seed + +end submodule stdlib_32_bit_water_hashes diff --git a/src/stdlib_64_bit_fnv_hashes.fypp b/src/stdlib_64_bit_fnv_hashes.fypp new file mode 100644 index 000000000..1eefdb886 --- /dev/null +++ b/src/stdlib_64_bit_fnv_hashes.fypp @@ -0,0 +1,125 @@ +!!------------------------------------------------------------------------------ +!! `FNV_1_HASH` and `FNV_1A_HASH` are translations to Fortran 2008 of the +!! `FNV-1` and `FNV-1a` hash functions of Glenn Fowler, Landon Curt Noll, +!! and Phong Vo, that has been released into the public domain. Permission +!! has been granted, by Landon Curt Noll, for the use of these algorithms +!! in the Fortran Standard Library. A description of these functions is +!! available at https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function. +!! The functions have been modified from their normal form to also encode +!! the size of the structure in the hash. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_fnv_hashes +! An implementation of the FNV hashes 1 and 1a of Glenn Fowler, Landon Curt +! Noll, and Kiem-Phong-Vo, +! https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function + implicit none + + integer(int_hash), parameter :: & + offset_basis = int( z'CBF29CE484222325', int_hash ), & + prime = int( z'100000001B3', int_hash ) + +contains + + pure module function int8_fnv_1( key ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + hash_code = hash_code * prime + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + end do + + end function int8_fnv_1 + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, 0_int8, & + bytes_${k1}$* & + size( key, kind=int64 ) ) ) + + end function ${k1}$_fnv_1 + +#:endfor + + pure module function character_fnv_1( key ) result(hash_code) + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1( transfer( key, & + 0_int8, & + bytes_char* & + len(key, kind=int64) ) ) + + end function character_fnv_1 + + + pure module function int8_fnv_1a( key ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int_hash) :: hash_code + + integer(int64) :: i + + hash_code = offset_basis + do i=1_int64, size(key, kind=int64) + if ( little_endian ) then + hash_code = ieor( hash_code, & + transfer( [key(i), 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8], & + 0_int_hash ) ) + else + hash_code = ieor( hash_code, & + transfer( [0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key(i)], & + 0_int_hash ) ) + end if + hash_code = hash_code * prime + end do + + end function int8_fnv_1a + + +#:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + bytes_${k1}$* & + size(key, kind=int64))) + + end function ${k1}$_fnv_1a + +#:endfor + + pure module function character_fnv_1a( key ) result(hash_code) + character(*), intent(in) :: key + integer(int_hash) :: hash_code + + hash_code = int8_fnv_1a( transfer( key, 0_int8, & + (bits_char/bits_int8)* & + len(key, kind=int64) ) ) + + end function character_fnv_1a + +end submodule stdlib_64_bit_fnv_hashes diff --git a/src/stdlib_64_bit_hash_functions.fypp b/src/stdlib_64_bit_hash_functions.fypp new file mode 100644 index 000000000..0f31a0d26 --- /dev/null +++ b/src/stdlib_64_bit_hash_functions.fypp @@ -0,0 +1,308 @@ +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int8", "int16", "int32", "int64"] + +module stdlib_64_bit_hash_functions + + use, intrinsic :: iso_fortran_env, only : & + character_storage_size + + use stdlib_kinds, only: & + dp, & + int8, & + int16, & + int32, & + int64 + + implicit none + + private + + integer, parameter, public :: & + int_hash = int64 +!! The number of bits in the output hash + +! The number of bits used by each integer type + integer, parameter, public :: & +! Should be 8 + bits_int8 = bit_size(0_int8), & +! Should be 16 + bits_int16 = bit_size(0_int16), & +! Should be 32 + bits_int32 = bit_size(0_int32), & +! Should be 64 + bits_int64 = bit_size(0_int64) + + integer, parameter, public :: & +! Should be 8 + bytes_int8 = bits_int8/bits_int8, & +! Should be 16 + bytes_int16 = bits_int16/bits_int8, & +! Should be 32 + bytes_int32 = bits_int32/bits_int8, & +! Should be 64 + bytes_int64 = bits_int64/bits_int8 + + integer, parameter, public :: & + bits_char = character_storage_size, & + bytes_char = bits_char/bits_int8 + +! Dealing with different endians + logical, parameter, public :: & + little_endian = ( 1 == transfer( [1_int8, 0_int8], 0_int16) ) + + public :: & + fibonacci_hash, & + fnv_1_hash, & + fnv_1a_hash, & + new_pengy_hash_seed, & + new_spooky_hash_seed, & + odd_random_integer, & + pengy_hash, & + spooky_hash, & + spookyhash_128, & + universal_mult_hash + +! pow64_over_phi is the odd number that most closely approximates 2**64/phi, +! where phi is the golden ratio 1.618... + integer(int64), parameter :: & + pow64_over_phi = int(z'9E3779B97F4A7C15', int64) + + integer(int_hash), parameter :: & + two_32 = 2_int_hash**32 + +! constants used by Bob Jenkins' SpookyHash + integer(int32), parameter :: & + sc_numvars = 12, & + sc_blocksize = sc_numvars*8, & + sc_buffsize = 2*sc_blocksize, & + sc_constsub = int(z'deadbeef', int32) + ! twos complement "deadbeef" + + integer(int64), parameter :: & + sc_const = transfer( [sc_constsub, sc_constsub], 0_int64 ) + + type :: spooky_subhash + integer(int8) :: data(0:2*sc_blocksize-1) + integer(int64) :: state(0:sc_numvars-1) + integer(int64) :: length + integer(int16) :: remainder + end type spooky_subhash + + interface fnv_1_hash +!! FNV_1 interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1 + #:endfor + + pure module function character_fnv_1( key ) result(hash_code) +!! FNV_1 hash function for character strings + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1 + + end interface fnv_1_hash + + + interface fnv_1a_hash +!! FNV_1A interfaces + #:for k1 in INT_KINDS + pure module function ${k1}$_fnv_1a( key ) result(hash_code) +!! FNV_1A hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int_hash) :: hash_code + end function ${k1}$_fnv_1a + #:endfor + + pure module function character_fnv_1a( key ) result(hash_code) +!! FNV_1A hash function for character strings + character(*), intent(in) :: key + integer(int_hash) :: hash_code + end function character_fnv_1a + + end interface fnv_1a_hash + + + interface murmur2_hash +!! MURMUR2_HASHES interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_murmur2_hash( key, seed ) & + result(hash_code) +!! MURMUR2 hash function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int_hash), intent(in) :: seed + integer(int_hash) :: hash_code + end function ${k1}$_murmur2_hash + #:endfor + + pure module function character_murmur2_hash( key, seed ) & + result(hash_code) +!! MURMUR2 hash function for character strings + character(*), intent(in) :: key + integer(int_hash), intent(in) :: seed + integer(int_hash) :: hash_code + end function character_murmur2_hash + + end interface murmur2_hash + + + interface spooky_hash +!! SPOOKY_HASH interfaces + + #:for k1 in INT_KINDS + module function ${k1}$_spooky_hash( key, seed ) & + result(hash_code) +!! SPOOKY HASH function for rank 1 arrays of kind ${k1}$ + integer(${k1}$), intent(in) :: key(0:) + integer(int_hash), intent(in) :: seed(2) + integer(int_hash) :: hash_code(2) + end function ${k1}$_spooky_hash + #:endfor + + module function character_spooky_hash( key, seed ) & + result(hash_code) +!! SPOOKY hash function for character strings + character(*), intent(in) :: key + integer(int_hash), intent(in) :: seed(2) + integer(int_hash) :: hash_code(2) + end function character_spooky_hash + + end interface spooky_hash + + interface + + module subroutine spookyHash_128( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int_hash), intent(inout) :: hash_inout(2) + end subroutine spookyHash_128 + + end interface + + + interface spooky_init + + module subroutine spookysubhash_init( self, seed ) + type(spooky_subhash), intent(out) :: self + integer(int_hash), intent(in) :: seed(2) + end subroutine spookysubhash_init + + end interface spooky_init + + + interface spooky_update + + module subroutine spookyhash_update( spooky, key ) + type(spooky_subhash), intent(out) :: spooky + integer(int8), intent(in) :: key(0:) + end subroutine spookyhash_update + + end interface spooky_update + + + interface spooky_final + + module subroutine spookyhash_final(spooky, hash_code) + type(spooky_subhash), intent(inout) :: spooky + integer(int_hash), intent(inout) :: hash_code(2) + end subroutine spookyhash_final + + end interface spooky_final + +interface + + module subroutine new_spooky_hash_seed( seed ) +! Random SEED generator for + integer(int64), intent(inout) :: seed(2) + end subroutine new_spooky_hash_seed + + end interface + + interface pengy_hash +!! PENGY_HASH interfaces + + #:for k1 in INT_KINDS + pure module function ${k1}$_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + end function ${k1}$_pengy_hash + #:endfor + + pure module function character_pengy_hash( key, seed ) & + result(hash_code) +!! MIR HASH STRICT function for character strings + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + end function character_pengy_hash + + end interface pengy_hash + + interface + + module subroutine new_pengy_hash_seed( seed ) +! Random SEED generator for MIR_HASH_STRICT + integer(int32), intent(inout) :: seed + end subroutine new_pengy_hash_seed + + end interface + +contains + + pure function fibonacci_hash( key, nbits ) result( sample ) +!! Maps the 64 bit integer KEY to an unsigned integer value with only NBITS +!! bits where NBITS is less than 64 + integer(int64), intent(in) :: key + integer, intent(in) :: nbits + integer(int64) :: sample + + sample = ishft( key*pow64_over_phi, -64 + nbits ) + + end function fibonacci_hash + + pure function universal_mult_hash( key, seed, nbits ) result( sample ) +!! Uses the "random" odd 64 bit integer SEED to map the 64 bit integer KEY to +!! an unsigned integer value with only NBITS bits where NBITS is less than 64. + integer(int64), intent(in) :: key + integer(int64), intent(in) :: seed + integer, intent(in) :: nbits + integer(int64) :: sample + + sample = ishft( key*seed, -64 + nbits ) + + end function universal_mult_hash + + subroutine odd_random_integer( harvest ) +!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! the odd integers of the 64 bit kind. + integer(int64), intent(out) :: harvest + real(dp) :: sample(2) + integer(int32) :: part(2) + + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, int32 ) + harvest = transfer( part, harvest ) + harvest = ishft( harvest, 1 ) + 1_int64 + + end subroutine odd_random_integer + + subroutine random_integer( harvest ) +!! Returns a 64 bit pseudo random integer, HARVEST, distributed uniformly over +!! the values of the 64 bit kind. + integer(int64), intent(out) :: harvest + real(dp) :: sample(2) + integer(int32) :: part(2) + + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, int32 ) + harvest = transfer( part, harvest ) + + end subroutine random_integer + +end module stdlib_64_bit_hash_functions diff --git a/src/stdlib_64_bit_pengy_hashes.fypp b/src/stdlib_64_bit_pengy_hashes.fypp new file mode 100644 index 000000000..ca1f14791 --- /dev/null +++ b/src/stdlib_64_bit_pengy_hashes.fypp @@ -0,0 +1,148 @@ +!!------------------------------------------------------------------------------ +!! `PENGY_HASH` is a translation to Fortran 2008 and signed two's complement +!! arithmetic of the `pengyhash` algorithm of Alberto Fajardo, copyright 2020. +!! Alberto Fajardo's original C code, `pengyhash.c`, is available at the URL: +!! https://github.com/tinypeng/pengyhash/blob/master/pengyhash.c +!! under the BSD 2-Clause License: +!! https://github.com/tinypeng/pengyhash/blob/master/LICENSE +!! +!! The BSD 2-Clause license is as follows: +!! +!! BSD 2-Clause License +!! +!! pengyhash +!! Copyright (c) 2020 Alberto Fajardo +!! All rights reserved. +!! +!! Redistribution and use in source and binary forms, with or without +!! modification, are permitted provided that the following conditions are met: +!! +!! 1. Redistributions of source code must retain the above copyright notice, +!! this list of conditions and the following disclaimer. +!! +!! 2. Redistributions in binary form must reproduce the above copyright notice, +!! this list of conditions and the following disclaimer in the documentation +!! and/or other materials provided with the distribution. +!! +!! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +!! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +!! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +!! ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +!! LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!! POSSIBILITY OF SUCH DAMAGE. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_pengy_hashes + + implicit none + +contains + + pure module function int8_pengy_hash( key, seed ) result(hash_code) + integer(int64) :: hash_code + integer(int8), intent(in) :: key(0:) + integer(int32), intent(in) :: seed + + integer(int64) :: b(0:3) + integer(int64) :: i + integer(int64) :: index + integer(int64) :: len + integer(int64) :: s(0:3) + integer(int64) :: seed2 + integer(int8) :: dummy(0:31) + + b(0:3) = 0_int64 + len = size( key, kind=int64 ) + s(0:3) = [ 0_int64, 0_int64, 0_int64, len ] + + index = 0_int64 + do while ( len >= 32 ) + b(0:3) = transfer( key( index:index+31 ), 0_int64, 4 ) + + s(0) = s(0) + s(1) + b(3) + s(1) = s(0) + ishftc( s(1), 14 ) + s(2) = s(2) + s(3) + b(2) + s(3) = s(2) + ishftc( s(3), 23 ) + s(0) = s(0) + s(3) + b(1) + s(3) = ieor( s(0), ishftc( s(3), 16 ) ) + s(2) = s(2) + s(1) + b(0) + s(1) = ieor( s(2), ishftc( s(1), 40 ) ) + + len = len - 32 + index = index + 32 + end do + + dummy(0:31) = transfer( b, 0_int8, 32 ) + dummy(0:len-1) = key(index:index+len-1) + b(0:3) = transfer( dummy, 0_int64, 4) + if ( little_endian ) then + seed2 = transfer( [ seed, 0_int32 ], 0_int64 ) + else + seed2 = transfer( [ 0_int32, seed ], 0_int64 ) + end if + + do i = 0, 5 + s(0) = s(0) + s(1) + b(3) + s(1) = s(0) + ishftc( s(1), 14 ) + seed2 + s(2) = s(2) + s(3) + b(2) + s(3) = s(2) + ishftc( s(3), 23 ) + s(0) = s(0) + s(3) + b(1) + s(3) = ieor( s(0), ishftc( s(3), 16 ) ) + s(2) = s(2) + s(1) + b(0) + s(1) = ieor( s(2), ishftc( s(1), 40 ) ) + end do + + hash_code = s(0) + s(1) + s(2) + s(3) + + end function int8_pengy_hash + +#:for k1 in INT_KINDS + pure module function ${k1}$_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for rank 1 array keys of kind ${k1}$ + integer(${k1}$), intent(in) :: key(:) + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + + hash_code = int8_pengy_hash( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), seed) + + end function ${k1}$_pengy_hash + +#:endfor + + pure module function character_pengy_hash( key, seed ) result(hash_code) +!! PENGY_HASH hash function for default character keys + character(*), intent(in) :: key + integer(int32), intent(in) :: seed + integer(int64) :: hash_code + + hash_code = int8_pengy_hash( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), seed) + + end function character_pengy_hash + + module subroutine new_pengy_hash_seed( seed ) +! Random SEED generator for PENGY_HASH + integer(int32), intent(inout) :: seed + real(dp) :: sample + integer(int32) :: old_seed + + old_seed = seed + find_seed: do + call random_number( sample ) + seed = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + if ( seed /= old_seed ) return + end do find_seed + + end subroutine new_pengy_hash_seed + +end submodule stdlib_64_bit_pengy_hashes diff --git a/src/stdlib_64_bit_spookyv2_hashes.fypp b/src/stdlib_64_bit_spookyv2_hashes.fypp new file mode 100644 index 000000000..eaaccff4d --- /dev/null +++ b/src/stdlib_64_bit_spookyv2_hashes.fypp @@ -0,0 +1,718 @@ +!!------------------------------------------------------------------------------ +!! `SPOOKY_HASH` is a translation to Fortran 2008 of the unsigned 64 bit +!! `SpookyHash` V2 function of Bob Jenkins +!! to signed 64 bit +!! operations. Bob Jenkins has put his code in the public domain and has +!! given permission to treat this code as public domain in the USA, +!! provided the code can be used under other licenses and he is given +!! appropriate credit. +!! The code was designed for Little-Endian processors. The output is +!! different on Big Endian processors, but still probably as good quality. +!!------------------------------------------------------------------------------ + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int16", "int32", "int64"] + +submodule(stdlib_64_bit_hash_functions) stdlib_64_bit_spookyv2_hashes + +! I have tried to make this portable while retaining efficiency. I assume +! processors with two's complement integers from 8, 16, 32, and 64 bits. +! The code is a transliteration of the 64 bit SpookyHash V2 of Bob Jenkins +! +! The code was designed for Little-Endian processors. The output is +! different on Big Endian processors, but still probably as good quality. + + implicit none + +contains + + + module function int8_spooky_hash( key, seed ) result(hash_code) + integer(int8), intent(in) :: key(:) + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( key, hash2 ) + hash_code = hash2 + + end function int8_spooky_hash + + +#:for k1 in INT_KINDS + module function ${k1}$_spooky_hash( key, seed ) result(hash_code) + integer(${k1}$), intent(in) :: key(:) + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( transfer( key, 0_int8, & + bytes_${k1}$*size(key, kind=int64) ), hash2 ) + hash_code = hash2 + + end function ${k1}$_spooky_hash + +#:endfor + + module function character_spooky_hash( key, seed ) result(hash_code) + character(*), intent(in) :: key + integer(int64), intent(in) :: seed(2) + integer(int64) :: hash_code(2) + + integer(int64) :: hash2(2) + + hash2(:) = seed + call spookyhash_128( transfer( key, 0_int8, & + bytes_char*len(key, kind=int64) ), hash2 ) + hash_code = hash2 + + end function character_spooky_hash + +! +! short hash ... it could be used on any message, +! but it's used by Spooky just for short messages. +! + subroutine spookyhash_short( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int64), intent(inout) :: hash_inout(2) + + integer(int64) :: a, b, c, d + integer(int64) :: length, p8, remainder + + p8 = 0 + length = size( key, kind=int64 ) + + ! The number of bytes after all the INT256s + remainder = iand( length, 31_int64 ) + a = hash_inout(1) + b = hash_inout(2) + c = sc_const + d = sc_const + + if ( length > 15 ) then + block + integer(int64) :: bend, step + integer(int64) :: buf(0:2*sc_numVars-1) + bend = ishft(length, -4) ! The number of complete INT128s + buf(0:2*bend-1) = transfer( key(0:16*bend-1), 0_int64, 2*bend ) + ! Number of Int64's in number of complete INT256s + bend = ishft(ishft(length, -5), 2) + + ! handle all complete sets of 32 bytes + do step = 0_int64, bend-1, 4 + c = c + buf(step) + d = d + buf(step+1) + call shortmix( a, b, c, d ) + a = a + buf(step+2) + b = b + buf(step+3) + end do + ! Completed all INT64s in complete INT256s + p8 = p8 + 8*bend ! Number of INT8s in complete INT256s + + ! Handle the case of 16+ remaining bytes. + if (remainder >= 16) then + c = c + buf(step) + d = d + buf(step+1) + call shortmix( a, b, c, d ) + p8 = p8 + 16 + remainder = remainder - 16 + end if + + end block + end if + + ! Handle the last 0..15 bytes, and its length V2 + d = d + shiftl( length, 56_int64 ) + + select case(remainder) + case(15) + go to 115 + case(14) + go to 114 + case(13) + go to 113 + case(12) + go to 112 + case(11) + go to 111 + case(10) + go to 110 + case(9) + go to 109 + case(8) + go to 108 + case(7) + go to 107 + case(6) + go to 106 + case(5) + go to 105 + case(4) + go to 104 + case(3) + go to 103 + case(2) + go to 102 + case(1) + go to 101 + case(0) + go to 100 + end select + +115 d = d + shiftl( map_to_64( key(p8+14) ), 48_int64 ) +114 d = d + shiftl( map_to_64( key(p8+13) ), 40_int64 ) +113 d = d + shiftl( map_to_64( key(p8+12) ), 32_int64 ) +112 if ( little_endian) then + d = d + transfer( [ transfer(key(p8+8:p8+11), 0_int32), & + 0_int32 ], 0_int64) + else + d = d + transfer( [ 0_int32, & + transfer(key(p8+8:p8+11), 0_int32) ], & + 0_int64) + end if + c = c + transfer( key(p8+0:p8+7), 0_int64 ) + go to 888 + +111 d = d + shiftl( map_to_64( key(p8+10) ), 16_int32 ) +110 d = d + shiftl( map_to_64( key(p8+9) ), 8_int32 ) +109 d = d + map_to_64( key(p8+8) ) +108 c = c + transfer( key(p8+0:p8+7), 0_int64 ) + go to 888 + +107 c = c + shiftl( map_to_64( key(p8+6) ), 48_int64 ) +106 c = c + shiftl( map_to_64( key(p8+5) ), 40_int64 ) +105 c = c + shiftl( map_to_64( key(p8+4) ), 32_int64 ) +104 if ( little_endian) then + c = c + transfer( [ transfer( key(p8+0:p8+3), 0_int32 ), & + 0_int32 ], 0_int64 ) + else + c = c + transfer( [ 0_int32, & + transfer( key(p8+0:p8+3), 0_int32 ) ], 0_int64 ) + end if + + go to 888 + +103 c = c + shiftl( map_to_64( key(p8+2) ), 16_int64 ) +102 c = c + shiftl( map_to_64( key(p8+1) ), 8_int64 ) +101 c = c + map_to_64( key(p8+0) ) + go to 888 + +100 c = c + sc_const + d = d + sc_const + +888 call short_end( a, b, c, d ) + + hash_inout(1) = a + hash_inout(2) = b + close(40) + + contains + + pure function map_to_64( key ) + integer(int8), intent(in) :: key + integer(int64) :: map_to_64 + + if ( little_endian ) then + map_to_64 = transfer( [ key, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, 0_int8 ], & + 0_int64 ) + else + map_to_64 = transfer( [ 0_int8, 0_int8, 0_int8, 0_int8, & + 0_int8, 0_int8, 0_int8, key ], & + 0_int64 ) + end if + + end function map_to_64 + + pure subroutine shortmix( h0, h1, h2, h3 ) + ! + ! The goal is for each bit of the input to expand into 128 bits of + ! apparent entropy before it is fully overwritten. + ! n trials both set and cleared at least m bits of h0 h1 h2 h3 + ! n: 2 m: 29 + ! n: 3 m: 46 + ! n: 4 m: 57 + ! n: 5 m: 107 + ! n: 6 m: 146 + ! n: 7 m: 152 + ! when run forwards or backwards + ! for all 1-bit and 2-bit diffs + ! with diffs defined by either xor or subtraction + ! with a base of all zeros plus a counter, or plus another bit, or random + ! + integer(int64), intent(inout) :: h0, h1, h2, h3 + + h2 = ishftc( h2, 50 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 52 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 30 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 41 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + h2 = ishftc( h2, 54 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 48 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 38 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 37 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + h2 = ishftc( h2, 62 ) + h2 = h2 + h3 + h0 = ieor( h0, h2 ) + h3 = ishftc( h3, 34 ) + h3 = h3 + h0 + h1 = ieor( h1, h3 ) + h0 = ishftc( h0, 5 ) + h0 = h0 + h1 + h2 = ieor( h2, h0 ) + h1 = ishftc( h1, 36 ) + h1 = h1 + h2 + h3 = ieor( h3, h1 ) + + end subroutine shortmix + + pure subroutine short_end( h0, h1, h2, h3 ) + ! + ! Mix all 4 inputs together so that h0, h1 are a hash of them all. + ! + ! For two inputs differing in just the input bits + ! Where "differ" means xor or subtraction + ! And the base value is random, or a counting value starting at that bit + ! The final result will have each bit of h0, h1 flip + ! For every input bit, + ! with probability 50 +- .3% (it is probably better than that) + ! For every pair of input bits, + ! with probability 50 +- .75% (the worst case is approximately that) + ! + integer(int64), intent(inout) :: h0, h1, h2, h3 + + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 15 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 52 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 26 ) + h1 = h1 + h0 + h2 = ieor( h2, h1 ) + h1 = ishftc( h1, 51 ) + h2 = h2 + h1 + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 28 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 9 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 47 ) + h1 = h1 + h0 + h2 = ieor( h2, h1 ) + h1 = ishftc( h1, 54 ) + h2 = h2 + h1 + h3 = ieor( h3, h2 ) + h2 = ishftc( h2, 32 ) + h3 = h3 + h2 + h0 = ieor( h0, h3 ) + h3 = ishftc( h3, 25 ) + h0 = h0 + h3 + h1 = ieor( h1, h0 ) + h0 = ishftc( h0, 63 ) + h1 = h1 + h0 + + end subroutine short_end + + end subroutine spookyhash_short + + +! do the whole hash in one call + module subroutine spookyHash_128( key, hash_inout ) + integer(int8), intent(in), target :: key(0:) + integer(int64), intent(inout) :: hash_inout(2) + + integer(int64) :: buf(sc_numvars) + integer(int64) :: h(0:11) + integer(int64) :: bend, i, length, p8, remain, remainder, tail + integer(int8) :: buf8(8) + + length = size(key, kind=int64) + + if ( length < sc_buffsize ) then + call spookyhash_short( key, hash_inout ) + return + end if + + h( [ 0, 3, 6, 9 ] ) = hash_inout(1) + h( [ 1, 4, 7, 10 ] ) = hash_inout(2) + h( [ 2, 5, 8, 11 ] ) = sc_const + + ! Number of bytes in number of complete internal states + bend = (length/sc_blocksize)*sc_blocksize + + ! Handle all SC_BLOCKSIZE blocks of bytes + do i=0, bend-1, sc_blocksize + buf(:) = transfer( key(i:i+sc_blocksize-1), 0_int64, sc_numVars ) + call spookyhash_mix( buf, h ) + end do ! all complete internal states processed + + ! handle the last partial block of sc_blocksize bytes + remainder = ( length - bend ) ! 0 <= remainder < sc_blocksize == 96 + remain = remainder / 8 ! Number of INT64's in partial block + buf(1:remain) = transfer( key(bend:bend+remain*8-1), 0_int64, remain ) + buf(remain+1:sc_numvars) = 0_int64 + tail = remainder - 8 * remain ! Number of INT8s after INT64s + p8 = bend + remain * 8 ! # of bytes until tail start + buf8(1:tail) = key(p8:p8+tail-1) + buf8(tail+1:8) = 0_int8 + buf(remain+1) = transfer( buf8, 0_int64 ) + buf8(1:7) = 0_int8 + buf8(8) = int( remainder, kind=int8 ) ! 0 <= remainder < 96 + buf(sc_numvars) = ieor( buf(sc_numvars), transfer( buf8, 0_int64 ) ) + + ! do some final mixing + call spookyhash_end( buf, h ) + hash_inout(1:2) = h(0:1) + + end subroutine spookyHash_128 + + ! + ! This is used if the input is 96 bytes long or longer. + ! + ! The internal state is fully overwritten every 96 bytes. + ! Every input bit appears to cause at least 128 bits of entropy + ! before 96 other bytes are combined, when run forward or backward + ! For every input bit, + ! Two inputs differing in just that input bit + ! Where "differ" means xor or subtraction + ! And the base value is random + ! When run forward or backwards one Mix + ! I tried 3 pairs of each; they all differed by at least 212 bits. + ! + pure subroutine spookyhash_mix( data, s ) + integer(int64), intent(in) :: data(0:) + integer(int64), intent(inout) :: s(0:11) + + s(0) = s(0) + data(0) + s(2) = ieor( s(2), s(10) ) + s(11) = ieor( s(11), s(0) ) + s(0) = ishftc( s(0), 11 ) + s(11) = s(11) + s(1) + s(1) = s(1) + data(1) + s(3) = ieor( s(3), s(11) ) + s(0) = ieor( s(0), s(1) ) + s(1) = ishftc( s(1), 32 ) + s(0) = s(0) + s(2) + s(2) = s(2) + data(2) + s(4) = ieor( s(4), s(0) ) + s(1) = ieor( s(1), s(2) ) + s(2) = ishftc( s(2), 43 ) + s(1) = s(1) + s(3) + s(3) = s(3) + data(3) + s(5) = ieor( s(5), s(1) ) + s(2) = ieor( s(2), s(3) ) + s(3) = ishftc( s(3), 31 ) + s(2) = s(2) + s(4) + s(4) = s(4) + data(4) + s(6) = ieor( s(6), s(2) ) + s(3) = ieor( s(3), s(4) ) + s(4) = ishftc( s(4), 17 ) + s(3) = s(3) + s(5) + s(5) = s(5) + data(5) + s(7) = ieor( s(7), s(3) ) + s(4) = ieor( s(4), s(5) ) + s(5) = ishftc( s(5), 28 ) + s(4) = s(4) + s(6) + s(6) = s(6) + data(6) + s(8) = ieor( s(8), s(4) ) + s(5) = ieor( s(5), s(6) ) + s(6) = ishftc( s(6), 39 ) + s(5) = s(5) + s(7) + s(7) = s(7) + data(7) + s(9) = ieor( s(9), s(5) ) + s(6) = ieor( s(6), s(7) ) + s(7) = ishftc( s(7), 57 ) + s(6) = s(6) + s(8) + s(8) = s(8) + data(8) + s(10) = ieor( s(10), s(6) ) + s(7) = ieor( s(7), s(8) ) + s(8) = ishftc( s(8), 55 ) + s(7) = s(7) + s(9) + s(9) = s(9) + data(9) + s(11) = ieor( s(11), s(7) ) + s(8) = ieor( s(8), s(9) ) + s(9) = ishftc( s(9), 54 ) + s(8) = s(8) + s(10) + s(10) = s(10) + data(10) + s(0) = ieor( s(0), s(8) ) + s(9) = ieor( s(9), s(10) ) + s(10) = ishftc( s(10), 22 ) + s(9) = s(9) + s(11) + s(11) = s(11) + data(11) + s(1) = ieor( s(1), s(9) ) + s(10) = ieor( s(10), s(11) ) + s(11) = ishftc( s(11), 46 ) + s(10) = s(10) + s(0) + + end subroutine spookyhash_mix + + + pure subroutine spookyhash_end( data, h) + integer(int64), intent(in) :: data(0:) + integer(int64), intent(inout) :: h(0:11) + + h = h + data(0:11) + call endpartial( h ) + call endpartial( h ) + call endpartial( h ) + + contains + ! + ! Mix all 12 inputs together so that h0, h1 are a hash of them all. + ! + ! For two inputs differing in just the input bits + ! Where "differ" means xor or subtraction + ! And the base value is random, or a counting value starting at that bit + ! The final result will have each bit of h0, h1 flip + ! For every input bit, + ! with probability 50 +- .3% + ! For every pair of input bits, + ! with probability 50 +- 3% + ! + ! This does not rely on the last Mix() call having already mixed some. + ! Two iterations was almost good enough for a 64-bit result, but a + ! 128-bit result is reported, so End() does three iterations. + ! + pure subroutine endpartial( h ) + integer(int64), intent(inout) :: h(0:11) + + h(11) = h(11) + h(1) + h(2) = ieor( h(2), h(11) ) + h(1) = ishftc( h(1), 44 ) + h(0) = h(0) + h(2) + h(3) = ieor( h(3), h(0) ) + h(2) = ishftc( h(2), 15 ) + h(1) = h(1) + h(3) + h(4) = ieor( h(4), h(1) ) + h(3) = ishftc( h(3), 34 ) + h(2) = h(2) + h(4) + h(5) = ieor( h(5), h(2) ) + h(4) = ishftc( h(4), 21 ) + h(3) = h(3) + h(5) + h(6) = ieor( h(6), h(3) ) + h(5) = ishftc( h(5), 38 ) + h(4) = h(4) + h(6) + h(7) = ieor( h(7), h(4) ) + h(6) = ishftc( h(6), 33 ) + h(5) = h(5) + h(7) + h(8) = ieor( h(8), h(5) ) + h(7) = ishftc( h(7), 10 ) + h(6) = h(6) + h(8) + h(9) = ieor( h(9), h(6) ) + h(8) = ishftc( h(8), 13 ) + h(7) = h(7) + h(9) + h(10) = ieor( h(10), h(7) ) + h(9) = ishftc( h(9), 38 ) + h(8) = h(8) + h(10) + h(11) = ieor( h(11), h(8) ) + h(10) = ishftc( h(10), 53 ) + h(9) = h(9) + h(11) + h(0) = ieor( h(0), h(9) ) + h(11) = ishftc( h(11), 42 ) + h(10) = h(10) + h(0) + h(1) = ieor( h(1), h(10) ) + h(0) = ishftc( h(0), 54 ) + + end subroutine endpartial + + end subroutine spookyhash_end + + + module subroutine spookysubhash_init( self, seed ) + type(spooky_subhash), intent(out) :: self + integer(int64), intent(in) :: seed(2) + + self % state(0:1) = seed + self % length = 0 + self % remainder = 0_int8 + + end subroutine spookysubhash_init + + +! add a message fragment to the state + module subroutine spookyhash_update( spooky, key ) + type(spooky_subhash), intent(out) :: spooky + integer(int8), intent(in) :: key(0:) + + integer(int8) :: dummy(0:7) + integer(int64) :: h(0:11) + integer(int64) :: bend, & + length, & + new_length, & + p8, & + remainder + + length = size(key, kind=int64) + new_length = length + spooky % remainder + + ! Is this message fragment too short? If it is, stuff it away. + if ( new_Length < sc_buffsize ) then + remainder = spooky % remainder + spooky % data( remainder:remainder+length-1 ) = key + spooky % length = length + spooky % length + dummy = transfer( new_length, 0_int8, 8 ) + if ( little_endian ) then + spooky % remainder = transfer( [ dummy(0), 0_int8 ], 0_int16 ) + else + spooky % remainder = transfer( [ 0_int8, dummy(7) ], 0_int16 ) + end if + return + end if + + ! init the variables + if ( spooky % length < sc_buffsize ) then + h( [ 0, 3, 6, 9 ] ) = spooky % state(0) + h( [ 1, 4, 7, 10 ] ) = spooky % state(1) + h( [ 2, 5, 8, 11 ] ) = sc_const + else + h(0:11) = spooky % state(0:11) + end if + + spooky % length = length + spooky % length + + ! if we've got anything stuffed away, use it now + if ( spooky % remainder /= 0_int16 ) then + block + integer(int16) :: prefix + prefix = sc_buffsize - spooky % remainder + remainder = spooky % remainder + spooky % data(remainder:remainder+prefix-1) = key(0:prefix-1) + call spookyhash_mix( transfer(spooky % data(0:sc_blocksize-1), & + 0_int64, sc_numvars), h ) + call spookyhash_mix( & + transfer(spooky % data(sc_blocksize:2*sc_blocksize-1), & + 0_int64, sc_numvars), h ) + p8 = prefix + length = length - prefix + end block + else + p8 = 0 + end if + + ! handle all whole blocks of sc_blocksize bytes requiring aligned bytes + bend = p8 + 8*(length/sc_blocksize)*sc_numVars + remainder = length - ( bend - p8 ) + do while( p8 < bend ) + spooky % data(0:sc_blocksize-1) = key( p8:p8+sc_blocksize-1 ) + call spookyhash_mix( transfer( spooky % data(0:sc_blocksize-1), & + 0_int64, sc_numvars), h ) + p8 = p8 + sc_blocksize + end do + + ! stuff away the last few bytes + spooky % remainder = remainder + + if ( remainder > 0 ) then + spooky % data(0:remainder-1) = & + key(bend:bend+remainder-1) + end if + + ! stuff away the variables + spooky % state(0:11) = h(0:11) + + end subroutine spookyhash_update + + +! report the hash for the concatenation of all message fragments so far + module subroutine spookyhash_final(spooky, hash_code) + type(spooky_subhash), intent(inout) :: spooky + integer(int64), intent(inout) :: hash_code(2) + + integer(int64) :: h(0:11) + integer(int64) :: index, remainder + integer(int8) :: dummy(2) + + ! init the variables + if ( spooky % length < sc_buffsize ) then + hash_code = spooky % state(0:1) + call spookyhash_short( spooky % data(0:spooky % length-1), & + hash_code ) + return + end if + + remainder = spooky % remainder + + h(0:11) = spooky % state(0:11) + + if ( remainder >= sc_blocksize ) then + ! m_data can contain two blocks; handle any whole first block + call spookyhash_mix( transfer( spooky % data, 0_int64, & + 2*sc_numvars), h ) + index = sc_blocksize + remainder = remainder - sc_blocksize + else + index = 0 + end if + + ! mix in the last partial block, and the length mod sc_blocksize + spooky % data(sc_blocksize+remainder:) = 0_int8 + dummy = transfer( remainder, 0_int8, 2 ) + + if ( little_endian ) then + spooky % data(sc_blocksize-1) = dummy(1) + else + spooky % data(sc_blocksize-1) = dummy(2) + end if + + ! do some final mixing + call spookyhash_end( transfer(spooky % data, 0_int64, 2*sc_numvars), h ) + + hash_code(1:2) = h(0:1) + + end subroutine spookyhash_final + + + function rot_64_32( a, k ) + integer(int64) :: rot_64_32 + integer(int64), intent(in) :: a + integer, intent(in) :: k + + rot_64_32 = iand( ior( shiftl( a, k ), shiftr( a, 32-k ) ), two_32-1 ) + + end function rot_64_32 + + + module subroutine new_spooky_hash_seed( seed ) +! Random SEED generator for + integer(int64), intent(inout) :: seed(2) + + integer(int64) :: old_seed(2) + real(dp) :: sample(4) + integer(int32) :: part(4) + + old_seed = seed + find_seed: do + call random_number( sample ) + part = int( floor( sample * 2_int64**32, int64 ) - 2_int64**31, & + int32 ) + seed = transfer( part, seed, 2 ) + if ( seed(1) /= old_seed(1) .or. seed(2) /= old_seed(2) ) return + end do find_seed + + end subroutine new_spooky_hash_seed + + +end submodule stdlib_64_bit_spookyv2_hashes diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 01df5d678..30a240beb 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -8,6 +8,7 @@ endmacro(ADDTEST) add_subdirectory(ascii) add_subdirectory(bitsets) +add_subdirectory(hash_functions) add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) diff --git a/src/tests/Makefile.manual b/src/tests/Makefile.manual index 3e801ad4b..7726b8adb 100644 --- a/src/tests/Makefile.manual +++ b/src/tests/Makefile.manual @@ -3,6 +3,7 @@ all test clean: $(MAKE) -f Makefile.manual --directory=ascii $@ $(MAKE) -f Makefile.manual --directory=bitsets $@ + $(MAKE) -f Makefile.manual --directory=hash_functions $@ $(MAKE) -f Makefile.manual --directory=io $@ $(MAKE) -f Makefile.manual --directory=logger $@ $(MAKE) -f Makefile.manual --directory=optval $@ diff --git a/src/tests/hash_functions/CMakeLists.txt b/src/tests/hash_functions/CMakeLists.txt new file mode 100644 index 000000000..459719c32 --- /dev/null +++ b/src/tests/hash_functions/CMakeLists.txt @@ -0,0 +1,2 @@ +ADDTEST(32_bit_hash_performance) +ADDTEST(64_bit_hash_performance) diff --git a/src/tests/hash_functions/Makefile.manual b/src/tests/hash_functions/Makefile.manual new file mode 100644 index 000000000..d3e59bd18 --- /dev/null +++ b/src/tests/hash_functions/Makefile.manual @@ -0,0 +1,3 @@ +PROGS_SRC = test_64_bit_hash_performance.f90 test_32_bit_hash_performance.f90 + +include ../Makefile.manual.test.mk diff --git a/src/tests/hash_functions/test_32_bit_hash_performance.f90 b/src/tests/hash_functions/test_32_bit_hash_performance.f90 new file mode 100644 index 000000000..acee5e36b --- /dev/null +++ b/src/tests/hash_functions/test_32_bit_hash_performance.f90 @@ -0,0 +1,190 @@ +program test_32_bit_hash_performance +!! Program to compare the relative performance of different 32 bit hash +!! functions + + use stdlib_kinds, only: & + dp, & + int8, & + int32, & + int64 + + use stdlib_32_bit_hash_functions + + implicit none + + integer, parameter :: & + block_size(8) = [ 1, 2, 4, 8, 16, 64, 256, 1024 ] + integer(int32), parameter :: huge32 = huge(0_int32) + real(dp), parameter :: hugep1 = real(huge32, dp) + 1.0_dp + integer, parameter :: rand_power = 16 + integer, parameter :: rand_size = 2**rand_power + integer, parameter :: test_size = rand_size * 4 + integer, parameter :: test_block = 2**10 + integer, parameter :: repeat = 4 + integer :: index, k + integer :: lun + real(dp) :: rand(2) + integer(int32) :: rand_object(rand_size) + integer(int8) :: test_object(test_size) + + open( newunit=lun, file="32_bit_hash_performance.txt", & + access="sequential", action="write", form="formatted", & + position="rewind" ) + + do index=1, rand_size + call random_number(rand) + if (rand(1) < 0.5_dp) then + rand_object(index) = ceiling(-rand(2)*hugep1, int32) - 1 + else + rand_object(index) = floor(rand(2)*hugep1, int32) + end if + end do + + test_object(:) = transfer( rand_object, 0_int8, test_size ) + + write(lun, '("| Algorithm | Key Size | Key # | Time (s) |")') + write(lun, '("| | Bytes | | |")') + write(lun, '("|------------|-----------|------------|----------|")') + + call test_fnv_1() + + call test_fnv_1a() + + call test_nmhash32() + + call test_nmhash32x() + + call test_water() + +contains + + subroutine test_fnv_1() + integer :: index2 + integer(int_hash) :: hash + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1 + + subroutine test_fnv_1a() + integer :: index2 + integer(int_hash) :: hash + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1a_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1a', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1a + + subroutine test_nmhash32() + integer :: index2 + integer(int_hash) :: hash + integer(int32) :: seed = 0_int32 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_nmhash32_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = nmhash32( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'nmhash32', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_nmhash32 + + subroutine test_nmhash32x() + integer :: index2 + integer(int_hash) :: hash + integer(int32) :: seed = 0_int32 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_nmhash32x_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = nmhash32x( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'nmhash32x', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_nmhash32x + + subroutine test_water() + integer :: index2 + integer(int_hash) :: hash + integer(int64) :: seed = 0_int64 + real :: t1, t2, tdiff + integer(int_hash) :: summary(repeat) + + call new_water_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = water_hash( test_object( index2: & + index2+block_size(k)-1 ),& + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'water', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_water + +end program test_32_bit_hash_performance diff --git a/src/tests/hash_functions/test_64_bit_hash_performance.f90 b/src/tests/hash_functions/test_64_bit_hash_performance.f90 new file mode 100644 index 000000000..6c445f781 --- /dev/null +++ b/src/tests/hash_functions/test_64_bit_hash_performance.f90 @@ -0,0 +1,161 @@ +program test_64_bit_hash_performance +!! Program to compare the relative performance of different 64 bit hash +!! functions + + use stdlib_kinds, only: & + dp, & + int8, & + int32, & + int64 + + use stdlib_64_bit_hash_functions + + implicit none + + integer, parameter :: & + block_size(8) = [ 1, 2, 4, 8, 16, 64, 256, 1024 ] + integer(int32), parameter :: huge32 = huge(0_int32) + real(dp), parameter :: hugep1 = real(huge32, dp) + 1.0_dp + integer, parameter :: rand_power = 16 + integer, parameter :: rand_size = 2**rand_power + integer, parameter :: test_size = rand_size * 4 + integer, parameter :: repeat = 4 + integer :: index, k + integer :: lun + real(dp) :: rand(2) + integer(int32) :: rand_object(rand_size) + integer(int8) :: test_object(test_size) + + + open( newunit=lun, file="64_bit_hash_performance.txt", & + access="sequential", action="write", form="formatted", & + position="rewind" ) + + do index=1, rand_size + call random_number(rand) + if (rand(1) < 0.5_dp) then + rand_object(index) = ceiling(-rand(2)*hugep1, int32) - 1 + else + rand_object(index) = floor(rand(2)*hugep1, int32) + end if + end do + + test_object(:) = transfer( rand_object, 0_int8, test_size ) + + write(lun, '("| Algorithm | Key Size | Key # | Time (s) |")') + write(lun, '("| | Bytes | | |")') + write(lun, '("|------------|-----------|------------|----------|")') + + call test_fnv_1() + + call test_fnv_1a() + + call test_pengy() + + call test_spooky() + +contains + + subroutine test_fnv_1() + integer :: index2 + integer(int64) :: hash + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1 + + subroutine test_fnv_1a() + integer :: index2 + integer(int64) :: hash + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = fnv_1a_hash( test_object( index2: & + index2+block_size(k)-1 ) ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'FNV-1a', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_fnv_1a + + subroutine test_spooky() + integer :: index2 + integer(int64) :: hash(2) + integer(int64) :: seed(2) = [ 0_int64, 0_int64 ] + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + call new_spooky_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = spooky_hash( test_object( index2: & + index2+block_size(k)-1 ), & + seed ) + if (index2 == index) summary(index) = hash(1) + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'Spooky', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_spooky + + subroutine test_pengy() + integer :: index2 + integer(int64) :: hash + integer(int32) :: seed = int( z'DEADBEEF', int32 ) + real :: t1, t2, tdiff + integer(int64) :: summary(repeat) + + call new_pengy_hash_seed( seed ) + do k=1, size(block_size) + call cpu_time(t1) + do index=1, repeat + do index2=1, test_size, block_size(k) + hash = pengy_hash( test_object( index2: & + index2+block_size(k)-1 ), & + seed ) + if (index2 == index) summary(index) = hash + end do + end do + call cpu_time(t2) + tdiff = t2-t1 + write(lun, '("|", a10, 2x, "|", i8, 3x, "|", 1x, i10, 1x, ' // & + '"|", f9.5, 1x, "|")') 'Pengy', & + block_size(k), repeat*(test_size/block_size(k)), tdiff + end do + + end subroutine test_pengy + +end program test_64_bit_hash_performance diff --git a/src/tests/hash_functions/validation/Makefile.validation b/src/tests/hash_functions/validation/Makefile.validation new file mode 100644 index 000000000..3f8e8ebcf --- /dev/null +++ b/src/tests/hash_functions/validation/Makefile.validation @@ -0,0 +1,50 @@ +MOD_PATH = -I../../../ + +FFLAGS = -O3 +CFLAGS = -O3 +CXXFLAGS = -O3 +LIBDIRS = -L./ +LIBS = -lc_hash +INCLUDE_DIRS = -I./ + +all: generate_hash_arrays generate_key_array hash_validity_test + +generate_key_array: generate_key_array.f90 + $(FC) $(FFLAGS) generate_key_array.f90 -o generate_key_array + +hash_validity_test: hash_validity_test.f90 + $(FC) $(FFLAGS) -L../../../ -lstdlib $(MOD_PATH) \ + hash_validity_test.f90 -o hash_validity_test + +generate_hash_arrays: generate_hash_arrays.o ./libc_hash.a + $(CXX) $(CXXFLAGS) $(LIBDIRS) generate_hash_arrays.o \ +$(LIBS) -o generate_hash_arrays + +generate_hash_arrays.o: generate_hash_arrays.cpp libc_hash.a + $(CXX) $(CXXFLAGS) -c generate_hash_arrays.cpp -o generate_hash_arrays.o + +libc_hash.a: SpookyV2.o SpookyV2Test.o pengyhash.o nmhash_scalar.o waterhash.o + ar rcs libc_hash.a SpookyV2.o SpookyV2Test.o pengyhash.o \ + nmhash_scalar.o waterhash.o + +pengyhash.o: pengyhash.c pengyhash.h + $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c pengyhash.c -o pengyhash.o + +waterhash.o: waterhash.c waterhash.h + $(CC) $(CFLAGS) $(INCLUDE_DIRS) -c waterhash.c -o waterhash.o + +SpookyV2.o: SpookyV2.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2.cpp -o SpookyV2.o + +SpookyV2Test.o: SpookyV2Test.cpp SpookyV2.h + $(CXX) $(CXXFLAGS) $(INCLUDE_DIRS) -c SpookyV2Test.cpp -o SpookyV2Test.o + +nmhash_scalar.o: nmhash_scalar.c nmhash_scalar.h + $(CC) $(CXXFLAGS) $(INCLUDE_DIRS) -c nmhash_scalar.c -o nmhash_scalar.o + +clean: + rm nmhash_scalar.o SpookyV2Test.o SpookyV2.o waterhash.o pengyhash.o \ + libc_hash.a generate_hash_arrays.o generate_hash_arrays \ + hash_validity_test generate_key_array + + diff --git a/src/tests/hash_functions/validation/README.md b/src/tests/hash_functions/validation/README.md new file mode 100644 index 000000000..61836b0dd --- /dev/null +++ b/src/tests/hash_functions/validation/README.md @@ -0,0 +1,24 @@ +# The `validation` subdirectory + +This directory contains source code and a makefile, `Makefile.validation`, +for generatng applications from the souce code intended to test the more +complicated hash functions in `libstdlib.a` aginst the original C and C++ +hash procedures. At least two of the hash procedures, nmhash32 anc +nmhash32x assumes that the C processor is either gcc or MSVC so that +currently the tests can only use gcc. + +The makefile generates three applications\: +`generate_key_array`, `generate_hash_arrays`, and `hash_validity_test`. +* `generate_key_array` generates a file, `key_array.bin`, that contains a +random sequence of 2048 eight bit integers. +* `generate_hash_arrays` generates five files, `c_nmhash32_array.bin`, `c_nmhash32x_array.bin`, `c_pengy_hash_array.bin`, `c_spooky_hash_array.bin`, +and `c_water_hash_array.bin`, that, in turn, represent the results of applying +the corresponding C/C++ hash functions on subsequences of the data in +`key_array.bin`. +* `hash_validity_test` compares the contents of the files generated by +`generate_hash_arrays` against the results of applying the corresponding +Fortran based hash functions in `libstdlib.a` on the same subsequences +of `key_array.bin`, and reports whether the comparisons match. + +The applications sshould be run in the sequence: first, `generate_key_array`, +then `generate_hash_arrays`, and finally `hash_validity_test`. diff --git a/src/tests/hash_functions/validation/SpookyV2.cpp b/src/tests/hash_functions/validation/SpookyV2.cpp new file mode 100644 index 000000000..735bd5629 --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2.cpp @@ -0,0 +1,351 @@ +// Spooky Hash +// A 128-bit noncryptographic hash, for checksums and table lookup +// By Bob Jenkins. Public domain. +// Oct 31 2010: published framework, disclaimer ShortHash isn't right +// Nov 7 2010: disabled ShortHash +// Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again +// April 10 2012: buffer overflow on platforms without unaligned reads +// July 12 2012: was passing out variables in final to in/out in short +// July 30 2012: I reintroduced the buffer overflow +// August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash + +#include +#include "SpookyV2.h" + +#define ALLOW_UNALIGNED_READS 1 + +// +// short hash ... it could be used on any message, +// but it's used by Spooky just for short messages. +// +void SpookyHash::Short( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + uint64 buf[2*sc_numVars]; + union + { + const uint8 *p8; + uint32 *p32; + uint64 *p64; + size_t i; + } u; + + u.p8 = (const uint8 *)message; + + if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) + { + memcpy(buf, message, length); + u.p64 = buf; + } + + size_t remainder = length%32; + uint64 a=*hash1; + uint64 b=*hash2; + uint64 c=sc_const; + uint64 d=sc_const; + + if (length > 15) + { + const uint64 *end = u.p64 + (length/32)*4; + + // handle all complete sets of 32 bytes + for (; u.p64 < end; u.p64 += 4) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + a += u.p64[2]; + b += u.p64[3]; + } + + //Handle the case of 16+ remaining bytes. + if (remainder >= 16) + { + c += u.p64[0]; + d += u.p64[1]; + ShortMix(a,b,c,d); + u.p64 += 2; + remainder -= 16; + } + } + + // Handle the last 0..15 bytes, and its length + d += ((uint64)length) << 56; + switch (remainder) + { + case 15: + d += ((uint64)u.p8[14]) << 48; + case 14: + d += ((uint64)u.p8[13]) << 40; + case 13: + d += ((uint64)u.p8[12]) << 32; + case 12: + d += u.p32[2]; + c += u.p64[0]; + break; + case 11: + d += ((uint64)u.p8[10]) << 16; + case 10: + d += ((uint64)u.p8[9]) << 8; + case 9: + d += (uint64)u.p8[8]; + case 8: + c += u.p64[0]; + break; + case 7: + c += ((uint64)u.p8[6]) << 48; + case 6: + c += ((uint64)u.p8[5]) << 40; + case 5: + c += ((uint64)u.p8[4]) << 32; + case 4: + c += u.p32[0]; + break; + case 3: + c += ((uint64)u.p8[2]) << 16; + case 2: + c += ((uint64)u.p8[1]) << 8; + case 1: + c += (uint64)u.p8[0]; + break; + case 0: + c += sc_const; + d += sc_const; + } + ShortEnd(a,b,c,d); + *hash1 = a; + *hash2 = b; +} + + + + +// do the whole hash in one call +void SpookyHash::Hash128( + const void *message, + size_t length, + uint64 *hash1, + uint64 *hash2) +{ + if (length < sc_bufSize) + { + Short(message, length, hash1, hash2); + return; + } + + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + uint64 buf[sc_numVars]; + uint64 *end; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + size_t remainder; + + h0=h3=h6=h9 = *hash1; + h1=h4=h7=h10 = *hash2; + h2=h5=h8=h11 = sc_const; + + u.p8 = (const uint8 *)message; + end = u.p64 + (length/sc_blockSize)*sc_numVars; + + // handle all whole sc_blockSize blocks of bytes + if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(buf, u.p64, sc_blockSize); + Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // handle the last partial block of sc_blockSize bytes + remainder = (length - ((const uint8 *)end-(const uint8 *)message)); + memcpy(buf, end, remainder); + memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); + ((uint8 *)buf)[sc_blockSize-1] = remainder; + + // do some final mixing + End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + *hash1 = h0; + *hash2 = h1; +} + + + +// init spooky state +void SpookyHash::Init(uint64 seed1, uint64 seed2) +{ + m_length = 0; + m_remainder = 0; + m_state[0] = seed1; + m_state[1] = seed2; +} + + +// add a message fragment to the state +void SpookyHash::Update(const void *message, size_t length) +{ + uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; + size_t newLength = length + m_remainder; + uint8 remainder; + union + { + const uint8 *p8; + uint64 *p64; + size_t i; + } u; + const uint64 *end; + + // Is this message fragment too short? If it is, stuff it away. + if (newLength < sc_bufSize) + { + memcpy(&((uint8 *)m_data)[m_remainder], message, length); + m_length = length + m_length; + m_remainder = (uint8)newLength; + return; + } + + // init the variables + if (m_length < sc_bufSize) + { + h0=h3=h6=h9 = m_state[0]; + h1=h4=h7=h10 = m_state[1]; + h2=h5=h8=h11 = sc_const; + } + else + { + h0 = m_state[0]; + h1 = m_state[1]; + h2 = m_state[2]; + h3 = m_state[3]; + h4 = m_state[4]; + h5 = m_state[5]; + h6 = m_state[6]; + h7 = m_state[7]; + h8 = m_state[8]; + h9 = m_state[9]; + h10 = m_state[10]; + h11 = m_state[11]; + } + m_length = length + m_length; + + // if we've got anything stuffed away, use it now + if (m_remainder) + { + uint8 prefix = sc_bufSize-m_remainder; + memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); + u.p64 = m_data; + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p8 = ((const uint8 *)message) + prefix; + length -= prefix; + } + else + { + u.p8 = (const uint8 *)message; + } + + // handle all whole blocks of sc_blockSize bytes + end = u.p64 + (length/sc_blockSize)*sc_numVars; + remainder = (uint8)(length-((const uint8 *)end-u.p8)); + if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) + { + while (u.p64 < end) + { + Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + else + { + while (u.p64 < end) + { + memcpy(m_data, u.p8, sc_blockSize); + Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + u.p64 += sc_numVars; + } + } + + // stuff away the last few bytes + m_remainder = remainder; + memcpy(m_data, end, remainder); + + // stuff away the variables + m_state[0] = h0; + m_state[1] = h1; + m_state[2] = h2; + m_state[3] = h3; + m_state[4] = h4; + m_state[5] = h5; + m_state[6] = h6; + m_state[7] = h7; + m_state[8] = h8; + m_state[9] = h9; + m_state[10] = h10; + m_state[11] = h11; +} + + +// report the hash for the concatenation of all message fragments so far +void SpookyHash::Final(uint64 *hash1, uint64 *hash2) +{ + // init the variables + if (m_length < sc_bufSize) + { + *hash1 = m_state[0]; + *hash2 = m_state[1]; + Short( m_data, m_length, hash1, hash2); + return; + } + + const uint64 *data = (const uint64 *)m_data; + uint8 remainder = m_remainder; + + uint64 h0 = m_state[0]; + uint64 h1 = m_state[1]; + uint64 h2 = m_state[2]; + uint64 h3 = m_state[3]; + uint64 h4 = m_state[4]; + uint64 h5 = m_state[5]; + uint64 h6 = m_state[6]; + uint64 h7 = m_state[7]; + uint64 h8 = m_state[8]; + uint64 h9 = m_state[9]; + uint64 h10 = m_state[10]; + uint64 h11 = m_state[11]; + + if (remainder >= sc_blockSize) + { + // m_data can contain two blocks; handle any whole first block + Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + data += sc_numVars; + remainder -= sc_blockSize; + } + + // mix in the last partial block, and the length mod sc_blockSize + memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); + + ((uint8 *)data)[sc_blockSize-1] = remainder; + + // do some final mixing + End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + + *hash1 = h0; + *hash2 = h1; +} + diff --git a/src/tests/hash_functions/validation/SpookyV2.h b/src/tests/hash_functions/validation/SpookyV2.h new file mode 100644 index 000000000..4ccc0d523 --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2.h @@ -0,0 +1,299 @@ +// +// SpookyHash: a 128-bit noncryptographic hash function +// By Bob Jenkins, public domain +// Oct 31 2010: alpha, framework + SpookyHash::Mix appears right +// Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right +// Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas +// Feb 2 2012: production, same bits as beta +// Feb 5 2012: adjusted definitions of uint* to be more portable +// Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough. +// August 5 2012: SpookyV2 (different results) +// +// Up to 3 bytes/cycle for long messages. Reasonably fast for short messages. +// All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. +// +// This was developed for and tested on 64-bit x86-compatible processors. +// It assumes the processor is little-endian. There is a macro +// controlling whether unaligned reads are allowed (by default they are). +// This should be an equally good hash on big-endian machines, but it will +// compute different results on them than on little-endian machines. +// +// Google's CityHash has similar specs to SpookyHash, and CityHash is faster +// on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders +// of magnitude slower. CRCs are two or more times slower, but unlike +// SpookyHash, they have nice math for combining the CRCs of pieces to form +// the CRCs of wholes. There are also cryptographic hashes, but those are even +// slower than MD5. +// + +#include + +#ifdef _MSC_VER +# define INLINE __forceinline + typedef unsigned __int64 uint64; + typedef unsigned __int32 uint32; + typedef unsigned __int16 uint16; + typedef unsigned __int8 uint8; +#else +# include +# define INLINE inline + typedef uint64_t uint64; + typedef uint32_t uint32; + typedef uint16_t uint16; + typedef uint8_t uint8; +#endif + + +class SpookyHash +{ +public: + // + // SpookyHash: hash a single message in one call, produce 128-bit output + // + static void Hash128( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 *hash1, // in/out: in seed 1, out hash value 1 + uint64 *hash2); // in/out: in seed 2, out hash value 2 + + // + // Hash64: hash a single message in one call, return 64-bit output + // + static uint64 Hash64( + const void *message, // message to hash + size_t length, // length of message in bytes + uint64 seed) // seed + { + uint64 hash1 = seed; + Hash128(message, length, &hash1, &seed); + return hash1; + } + + // + // Hash32: hash a single message in one call, produce 32-bit output + // + static uint32 Hash32( + const void *message, // message to hash + size_t length, // length of message in bytes + uint32 seed) // seed + { + uint64 hash1 = seed, hash2 = seed; + Hash128(message, length, &hash1, &hash2); + return (uint32)hash1; + } + + // + // Init: initialize the context of a SpookyHash + // + void Init( + uint64 seed1, // any 64-bit value will do, including 0 + uint64 seed2); // different seeds produce independent hashes + + // + // Update: add a piece of a message to a SpookyHash state + // + void Update( + const void *message, // message fragment + size_t length); // length of message fragment in bytes + + + // + // Final: compute the hash for the current SpookyHash state + // + // This does not modify the state; you can keep updating it afterward + // + // The result is the same as if SpookyHash() had been called with + // all the pieces concatenated into one message. + // + void Final( + uint64 *hash1, // out only: first 64 bits of hash value. + uint64 *hash2); // out only: second 64 bits of hash value. + + // + // left rotate a 64-bit value by k bytes + // + static INLINE uint64 Rot64(uint64 x, int k) + { + return (x << k) | (x >> (64 - k)); + } + + // + // This is used if the input is 96 bytes long or longer. + // + // The internal state is fully overwritten every 96 bytes. + // Every input bit appears to cause at least 128 bits of entropy + // before 96 other bytes are combined, when run forward or backward + // For every input bit, + // Two inputs differing in just that input bit + // Where "differ" means xor or subtraction + // And the base value is random + // When run forward or backwards one Mix + // I tried 3 pairs of each; they all differed by at least 212 bits. + // + static INLINE void Mix( + const uint64 *data, + uint64 &s0, uint64 &s1, uint64 &s2, uint64 &s3, + uint64 &s4, uint64 &s5, uint64 &s6, uint64 &s7, + uint64 &s8, uint64 &s9, uint64 &s10,uint64 &s11) + { + s0 += data[0]; s2 ^= s10; s11 ^= s0; s0 = Rot64(s0,11); s11 += s1; + s1 += data[1]; s3 ^= s11; s0 ^= s1; s1 = Rot64(s1,32); s0 += s2; + s2 += data[2]; s4 ^= s0; s1 ^= s2; s2 = Rot64(s2,43); s1 += s3; + s3 += data[3]; s5 ^= s1; s2 ^= s3; s3 = Rot64(s3,31); s2 += s4; + s4 += data[4]; s6 ^= s2; s3 ^= s4; s4 = Rot64(s4,17); s3 += s5; + s5 += data[5]; s7 ^= s3; s4 ^= s5; s5 = Rot64(s5,28); s4 += s6; + s6 += data[6]; s8 ^= s4; s5 ^= s6; s6 = Rot64(s6,39); s5 += s7; + s7 += data[7]; s9 ^= s5; s6 ^= s7; s7 = Rot64(s7,57); s6 += s8; + s8 += data[8]; s10 ^= s6; s7 ^= s8; s8 = Rot64(s8,55); s7 += s9; + s9 += data[9]; s11 ^= s7; s8 ^= s9; s9 = Rot64(s9,54); s8 += s10; + s10 += data[10]; s0 ^= s8; s9 ^= s10; s10 = Rot64(s10,22); s9 += s11; + s11 += data[11]; s1 ^= s9; s10 ^= s11; s11 = Rot64(s11,46); s10 += s0; + } + + // + // Mix all 12 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% + // For every pair of input bits, + // with probability 50 +- 3% + // + // This does not rely on the last Mix() call having already mixed some. + // Two iterations was almost good enough for a 64-bit result, but a + // 128-bit result is reported, so End() does three iterations. + // + static INLINE void EndPartial( + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + h11+= h1; h2 ^= h11; h1 = Rot64(h1,44); + h0 += h2; h3 ^= h0; h2 = Rot64(h2,15); + h1 += h3; h4 ^= h1; h3 = Rot64(h3,34); + h2 += h4; h5 ^= h2; h4 = Rot64(h4,21); + h3 += h5; h6 ^= h3; h5 = Rot64(h5,38); + h4 += h6; h7 ^= h4; h6 = Rot64(h6,33); + h5 += h7; h8 ^= h5; h7 = Rot64(h7,10); + h6 += h8; h9 ^= h6; h8 = Rot64(h8,13); + h7 += h9; h10^= h7; h9 = Rot64(h9,38); + h8 += h10; h11^= h8; h10= Rot64(h10,53); + h9 += h11; h0 ^= h9; h11= Rot64(h11,42); + h10+= h0; h1 ^= h10; h0 = Rot64(h0,54); + } + + static INLINE void End( + const uint64 *data, + uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3, + uint64 &h4, uint64 &h5, uint64 &h6, uint64 &h7, + uint64 &h8, uint64 &h9, uint64 &h10,uint64 &h11) + { + h0 += data[0]; h1 += data[1]; h2 += data[2]; h3 += data[3]; + h4 += data[4]; h5 += data[5]; h6 += data[6]; h7 += data[7]; + h8 += data[8]; h9 += data[9]; h10 += data[10]; h11 += data[11]; + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); + } + + // + // The goal is for each bit of the input to expand into 128 bits of + // apparent entropy before it is fully overwritten. + // n trials both set and cleared at least m bits of h0 h1 h2 h3 + // n: 2 m: 29 + // n: 3 m: 46 + // n: 4 m: 57 + // n: 5 m: 107 + // n: 6 m: 146 + // n: 7 m: 152 + // when run forwards or backwards + // for all 1-bit and 2-bit diffs + // with diffs defined by either xor or subtraction + // with a base of all zeros plus a counter, or plus another bit, or random + // + static INLINE void ShortMix(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; + h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; + h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; + h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; + h1 = Rot64(h1,36); h1 += h2; h3 ^= h1; + } + + // + // Mix all 4 inputs together so that h0, h1 are a hash of them all. + // + // For two inputs differing in just the input bits + // Where "differ" means xor or subtraction + // And the base value is random, or a counting value starting at that bit + // The final result will have each bit of h0, h1 flip + // For every input bit, + // with probability 50 +- .3% (it is probably better than that) + // For every pair of input bits, + // with probability 50 +- .75% (the worst case is approximately that) + // + static INLINE void ShortEnd(uint64 &h0, uint64 &h1, uint64 &h2, uint64 &h3) + { + h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; + h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; + h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; + h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; + h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; + } + +private: + + // + // Short is used for messages under 192 bytes in length + // Short has a low startup cost, the normal mode is good for long + // keys, the cost crossover is at about 192 bytes. The two modes were + // held to the same quality bar. + // + static void Short( + const void *message, // message (array of bytes, not necessarily aligned) + size_t length, // length of message (in bytes) + uint64 *hash1, // in/out: in the seed, out the hash value + uint64 *hash2); // in/out: in the seed, out the hash value + + // number of uint64's in internal state + static const size_t sc_numVars = 12; + + // size of the internal state + static const size_t sc_blockSize = sc_numVars*8; + + // size of buffer of unhashed data, in bytes + static const size_t sc_bufSize = 2*sc_blockSize; + + // + // sc_const: a constant which: + // * is not zero + // * is odd + // * is a not-very-regular mix of 1's and 0's + // * does not need any other special mathematical properties + // + static const uint64 sc_const = 0xdeadbeefdeadbeefLL; + + uint64 m_data[2*sc_numVars]; // unhashed data, for partial messages + uint64 m_state[sc_numVars]; // internal state of the hash + size_t m_length; // total length of the input so far + uint8 m_remainder; // length of unhashed data stashed in m_data +}; + + + diff --git a/src/tests/hash_functions/validation/SpookyV2Test.cpp b/src/tests/hash_functions/validation/SpookyV2Test.cpp new file mode 100644 index 000000000..3b9e6826f --- /dev/null +++ b/src/tests/hash_functions/validation/SpookyV2Test.cpp @@ -0,0 +1,52 @@ +#include "SpookyV2.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t s0 = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, &s0, &s1); + ((uint32_t *)out)[0]= (uint32_t)s0; +} + +void SpookyHash64_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, out64, &s1); +} + +void SpookyHash128_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + out64[1] = state64[1]; + SpookyHash::Hash128(key, len, out64, out64+1); +} + +void SpookyHash_seed_state_test(int in_bits, const void *seed, void *state) { + uint64_t *state64= (uint64_t *)state; + if (in_bits == 32) { + state64[0]= state64[1]= ((uint32_t*)seed)[0]; + } + else { + uint64_t *seed64= (uint64_t *)seed; + if (in_bits == 64) { + state64[0]= state64[1]= seed64[0]; + } + else + if (in_bits == 128) { + state64[0]= seed64[0]; + state64[1]= seed64[1]; + } + } +} + + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/generate_hash_arrays.cpp b/src/tests/hash_functions/validation/generate_hash_arrays.cpp new file mode 100644 index 000000000..bcd95dfb3 --- /dev/null +++ b/src/tests/hash_functions/validation/generate_hash_arrays.cpp @@ -0,0 +1,174 @@ +#include +#include + +extern "C" { + #include "nmhash.h" + #include "nmhash_scalar.h" + #include "pengyhash.h" + #include "waterhash.h" +} + +#include "SpookyV2.h" + +void SpookyHash32_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t s0 = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, &s0, &s1); + ((uint32_t *)out)[0]= (uint32_t)s0; +} + +void SpookyHash64_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + uint64_t s1 = state64[1]; + SpookyHash::Hash128(key, len, out64, &s1); +} + +void SpookyHash128_with_state_test(const void *key, size_t len, const void *state, void *out) { + uint64_t *state64= (uint64_t *)state; + uint64_t *out64= (uint64_t *)out; + out64[0] = state64[0]; + out64[1] = state64[1]; + SpookyHash::Hash128(key, len, out64, out64+1); +} + +void SpookyHash_seed_state_test(int in_bits, const void *seed, void *state) { + uint64_t *state64= (uint64_t *)state; + if (in_bits == 32) { + state64[0]= state64[1]= ((uint32_t*)seed)[0]; + } + else { + uint64_t *seed64= (uint64_t *)seed; + if (in_bits == 64) { + state64[0]= state64[1]= seed64[0]; + } + else + if (in_bits == 128) { + state64[0]= seed64[0]; + state64[1]= seed64[1]; + } + } +} + +using namespace std; + +static const int SIZE = 2048; +char * key_array = new char[SIZE]; +static const uint32_t NM_SEED = 0xdeadbeef; +static const uint64_t WATER_SEED = 0xdeadbeef1eadbeef; +static const uint32_t PENGY_SEED = 0xdeadbeef; +static const uint64_t SPOOKY_SEED[2] = { WATER_SEED, WATER_SEED }; + +int read_keys(){ + string inFileName = "key_array.bin"; + std::ifstream fin( inFileName, ios::in | ios::binary ); + if (!fin){ + cout << "Cannot open key_array.bin!" << endl; + return 1; + } + fin.read(key_array, SIZE); + fin.close(); + return 0; +} + +int write_nmhash32(){ + size_t i; + uint32_t hash; + string outFileName = "c_nmhash32_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_nmhash32_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = NMHASH32((void *) key_array, i, NM_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_nmhash32x(){ + size_t i; + uint32_t hash; + string outFileName = "c_nmhash32x_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_nmhash32x_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = NMHASH32X((void *) key_array, i, NM_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_water(){ + uint32_t i; + uint32_t hash; + string outFileName = "c_water_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_water_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = waterhash((void *) key_array, i, WATER_SEED); + fout.write((char *) &hash, 4); + } + fout.close(); + return 0; +} + +int write_pengy(){ + size_t i; + uint64_t hash; + string outFileName = "c_pengy_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_pengy_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + hash = pengyhash((void *) key_array, i, PENGY_SEED); + fout.write((char *) &hash, 8); + } + fout.close(); + return 0; +} + +int write_spooky(){ + size_t i; + uint64_t hash[2]; + string outFileName = "c_spooky_hash_array.bin"; + std::ofstream fout( outFileName, ios::out | ios::binary ); + + if (!fout){ + cout << "Cannot open c_spooky_hash_array.bin!" << endl; + return 1; + } + for( i=0; i<=SIZE; i+=1 ){ + SpookyHash128_with_state_test((void *) key_array, i, (void *) SPOOKY_SEED, (void *) hash); + fout.write((char *) hash, 16); + } + fout.close(); + return 0; +} + +int main(){ + if (read_keys()==1){return 1;}; + if (write_nmhash32()==1){return 1;}; + if (write_nmhash32x()==1){return 1;}; + if (write_water()==1){return 1;}; + if (write_pengy()==1){return 1;}; + if (write_spooky()==1){return 1;}; + return 0; +} diff --git a/src/tests/hash_functions/validation/generate_key_array.f90 b/src/tests/hash_functions/validation/generate_key_array.f90 new file mode 100644 index 000000000..40b43a043 --- /dev/null +++ b/src/tests/hash_functions/validation/generate_key_array.f90 @@ -0,0 +1,22 @@ +program generate_key_array + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + + integer :: lun + integer(int8) :: key_array(2048) + integer(int32) :: dummy(512) + real(real64) :: rand(512) + +! Create key array + call random_number( rand ) + do i=1, 512 + dummy(i) = floor( rand(i) * 2_int64**32 - 2_int64**31, kind=int32 ) + end do + key_array = transfer( dummy, 0_int8, 2048 ) + + open(newunit=lun, file="key_array.bin", form="unformatted", & + access="stream", status="new", action="write") + write(lun) key_array + close(lun) + +end program generate_key_array diff --git a/src/tests/hash_functions/validation/hash_validity_test.f90 b/src/tests/hash_functions/validation/hash_validity_test.f90 new file mode 100644 index 000000000..86d2cc3df --- /dev/null +++ b/src/tests/hash_functions/validation/hash_validity_test.f90 @@ -0,0 +1,123 @@ +!! HASH_VALIDITY_TEST processes a vector of eight bit integers, +!! extracting subvectors of length 0, 1, 2, ... 2048 from the beginning +!! hashing each subvector and comparing the resulting hash with the +!! corresponding hash produced by the original C/C++ code, stopping if +!! they are different. As the original C/C++ code was typically developed +!! for Little-Endian machines the testing should only be cone on such +!! machones. The Fortran codes also assume two's complement integers. +!! The code set assume that C's int32_t and int64_t have the same +!! representation as Firtrans int32 and int64 respectively. + +program hash_validity_test + + use, intrinsic :: iso_fortran_env, only: int8, int32, int64, real64 + use stdlib_32_bit_hash_functions, only: & + little_endian, & + nmhash32, & + nmhash32x, & + water_hash + use stdlib_64_bit_hash_functions, only: & + pengy_hash, & + spooky_hash + + integer(int32), parameter :: nm_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: water_seed = int( z'deadbeef1eadbeef', int64 ) + integer(int32), parameter :: pengy_seed = int( z'deadbeef', int32 ) + integer(int64), parameter :: spooky_seed(2) = [ water_seed, water_seed ] + integer :: index + integer :: lun + integer(int8) :: key_array(2048) + integer(int32) :: c_nmhash32(0:2048) + integer(int32) :: c_nmhash32x(0:2048) + integer(int32) :: c_water_hash(0:2048) + integer(int64) :: c_pengy_hash(0:2048) + integer(int64) :: c_spooky_hash(0:1, 0:2048) + + + ! Test for endianness + if ( .not. little_endian ) then + stop "The processor is not Little-Endian" + end if + + ! Read key array used to generate hash array + open(newunit=lun, file="key_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) key_array + close(lun) + + ! Read hash array generated from key array by the C version of nmhash32 + open(newunit=lun, file="c_nmhash32_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_nmhash32 + close(lun) + + do index=0, 2048 + if ( c_nmhash32(index) /= nmhash32(key_array(1:index), nm_seed) ) then + write(*,'("NMHASH32 failed for KEY_ARRAY(1:", I0, ")")') index + stop "NMHASH32 is invalid." + end if + end do + write(*,*) "NMHASH32 is valid." + + ! Read hash array generated from key array by the C version of nmhash32x + open(newunit=lun, file="c_nmhash32x_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_nmhash32x + close(lun) + + do index=0, 2048 + if ( c_nmhash32x(index) /= nmhash32x(key_array(1:index), nm_seed) ) then + write(*,'("NMHASH32X failed for KEY_ARRAY(1:", I0, ")")') index + stop "NMHASH32X is invalid." + end if + end do + write(*,*) "NMHASH32X is valid." + + ! Read hash array generated from key array by the C version of water hash + open(newunit=lun, file="c_water_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_water_hash + close(lun) + + do index=0, 2048 + if ( c_water_hash(index) /= & + water_hash(key_array(1:index), water_seed) ) then + write(*,'("WATER_HASH failed for KEY_ARRAY(1:", I0, ")")') index + stop "WATER_HASH is invalid." + end if + end do + write(*,*) "WATER_HASH is valid." + + ! Read hash array generated from key array by the C version of pengy hash + open(newunit=lun, file="c_pengy_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + read(lun) c_pengy_hash + close(lun) + + do index=0, 2048 + if ( c_pengy_hash(index) /= & + pengy_hash(key_array(1:index), pengy_seed) ) then + write(*,'("PENGY_HASH failed for KEY_ARRAY(1:", I0, ")")') index + stop "PENGY_HASH is invalid." + end if + end do + write(*,*) "PENGY_HASH is valid." + + ! Read hash array generated from key array by the C version of Spooky hash + open(newunit=lun, file="c_spooky_hash_array.bin", form="unformatted", & + access="stream", status="old", action="read") + do index=0, 2048 + read(lun) c_spooky_hash(:, index) + end do + close(lun) + + do index=0, 2048 + if ( .not. all( c_spooky_hash(:,index) == & + spooky_hash(key_array(1:index), spooky_seed) ) ) then + write(*,'("SPOOKY_HASH failed for KEY_ARRAY(:,1:", I0, ")")') index + stop "SPOOKY_HASH is invalid." + end if + end do + write(*,*) "SPOOKY_HASH is valid." + +end program hash_validity_test diff --git a/src/tests/hash_functions/validation/nmhash.c b/src/tests/hash_functions/validation/nmhash.c new file mode 100644 index 000000000..987bc568c --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash.c @@ -0,0 +1,8 @@ +#include "nmhash.h" +int32_t nmhash32_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32 (key, (const size_t) len, seed); +} + +int32_t nmhash32x_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32X (key, (const size_t) len, seed); +} diff --git a/src/tests/hash_functions/validation/nmhash.h b/src/tests/hash_functions/validation/nmhash.h new file mode 100644 index 000000000..21bb90022 --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash.h @@ -0,0 +1,832 @@ +/* + * verification: + * NMHASH32: + * rurban/smhasher: 0x12A30553 + * demerphq/smhasher: 0x3D8F6C47 + * NMHASH32X: + * rurban/smhasher: 0xA8580227 + * demerphq/smhasher: 0x40B451B3 + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _nmhash_h_ +#define _nmhash_h_ + +#define NMH_VERSION 2 + +#ifdef _MSC_VER +# pragma warning(push, 3) +#endif + +#if defined(__cplusplus) && __cplusplus < 201103L +# define __STDC_CONSTANT_MACROS 1 +#endif + +#include +#include + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# endif +#elif defined(_MSC_VER) +# include +#endif + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define NMH_likely(x) __builtin_expect(x, 1) +#else +# define NMH_likely(x) (x) +#endif + +#if defined(__has_builtin) +# if __has_builtin(__builtin_rotateleft32) +# define NMH_rotl32 __builtin_rotateleft32 /* clang */ +# endif +#endif +#if !defined(NMH_rotl32) +# if defined(_MSC_VER) + /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +# define NMH_rotl32(x,r) _rotl(x,r) +# else +# define NMH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# endif +#endif + +#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +# define NMH_RESTRICT /* disable */ +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define NMH_RESTRICT restrict +#elif defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) +# define NMH_RESTRICT __restrict__ +#elif defined(__cplusplus) && defined(_MSC_VER) +# define NMH_RESTRICT __restrict +#else +# define NMH_RESTRICT /* disable */ +#endif + +/* endian macros */ +#ifndef NMHASH_LITTLE_ENDIAN +# if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || defined(__x86_64__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__SDCC) +# define NMHASH_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define NMHASH_LITTLE_ENDIAN 0 +# else +# warning could not determine endianness! Falling back to little endian. +# define NMHASH_LITTLE_ENDIAN 1 +# endif +#endif + +/* vector macros */ +#define NMH_SCALAR 0 +#define NMH_SSE2 1 +#define NMH_AVX2 2 +#define NMH_AVX512 3 + +#ifndef NMH_VECTOR /* can be defined on command line */ +# if defined(__AVX512BW__) +# define NMH_VECTOR NMH_AVX512 /* _mm512_mullo_epi16 requires AVX512BW */ +# elif defined(__AVX2__) +# define NMH_VECTOR NMH_AVX2 /* add '-mno-avx256-split-unaligned-load' and '-mn-oavx256-split-unaligned-store' for gcc */ +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define NMH_VECTOR NMH_SSE2 +# else +# define NMH_VECTOR NMH_SCALAR +# endif +#endif + +/* align macros */ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define NMH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define NMH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define NMH_ALIGN(n) __declspec(align(n)) +#else +# define NMH_ALIGN(n) /* disabled */ +#endif + +#if NMH_VECTOR > 0 +# define NMH_ACC_ALIGN 64 +#elif defined(__BIGGEST_ALIGNMENT__) +# define NMH_ACC_ALIGN __BIGGEST_ALIGNMENT__ +#elif defined(__SDCC) +# define NMH_ACC_ALIGN 1 +#else +# define NMH_ACC_ALIGN 16 +#endif + +/* constants */ + +/* primes from xxh */ +#define NMH_PRIME32_1 UINT32_C(0x9E3779B1) +#define NMH_PRIME32_2 UINT32_C(0x85EBCA77) +#define NMH_PRIME32_3 UINT32_C(0xC2B2AE3D) +#define NMH_PRIME32_4 UINT32_C(0x27D4EB2F) + +/*! Pseudorandom secret taken directly from FARSH. */ +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t NMH_ACC_INIT[32] = { + UINT32_C(0xB8FE6C39), UINT32_C(0x23A44BBE), UINT32_C(0x7C01812C), UINT32_C(0xF721AD1C), + UINT32_C(0xDED46DE9), UINT32_C(0x839097DB), UINT32_C(0x7240A4A4), UINT32_C(0xB7B3671F), + UINT32_C(0xCB79E64E), UINT32_C(0xCCC0E578), UINT32_C(0x825AD07D), UINT32_C(0xCCFF7221), + UINT32_C(0xB8084674), UINT32_C(0xF743248E), UINT32_C(0xE03590E6), UINT32_C(0x813A264C), + + UINT32_C(0x3C2852BB), UINT32_C(0x91C300CB), UINT32_C(0x88D0658B), UINT32_C(0x1B532EA3), + UINT32_C(0x71644897), UINT32_C(0xA20DF94E), UINT32_C(0x3819EF46), UINT32_C(0xA9DEACD8), + UINT32_C(0xA8FA763F), UINT32_C(0xE39C343F), UINT32_C(0xF9DCBBC7), UINT32_C(0xC70B4F1D), + UINT32_C(0x8A51E04B), UINT32_C(0xCDB45931), UINT32_C(0xC89F7EC9), UINT32_C(0xD9787364), +}; + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(push) +# pragma warning(disable: 5045) +#endif +#ifdef __SDCC +# define const +# pragma save +# pragma disable_warning 110 +# pragma disable_warning 126 +#endif + +/* read functions */ +static inline +uint32_t +NMH_readLE32(const void *const p) +{ + uint32_t v; + memcpy(&v, p, 4); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + return __builtin_bswap32(v); +# elif defined(_MSC_VER) + return _byteswap_ulong(v); +# else + return ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); +# endif +} + +static inline +uint16_t +NMH_readLE16(const void *const p) +{ + uint16_t v; + memcpy(&v, p, 2); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# else + return (uint16_t)((v << 8) | (v >> 8)); +# endif +} + +static inline +uint32_t +NMHASH32_0to8(uint32_t const x, uint32_t const seed2) +{ + /* base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] = 0.027071104091278835 */ + const uint32_t m1 = UINT32_C(0x776BF593); + const uint32_t m2 = UINT32_C(0x3FB39C65); + const uint32_t m3 = UINT32_C(0xE9139917); + +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 12) ^ (vx.u32 >> 6); + vx.u16[0] *= (uint16_t)m1; + vx.u16[1] *= (uint16_t)(m1 >> 16); + vx.u32 ^= (vx.u32 << 11) ^ ( vx.u32 >> 19); + vx.u16[0] *= (uint16_t)m2; + vx.u16[1] *= (uint16_t)(m2 >> 16); + vx.u32 ^= seed2; + vx.u32 ^= (vx.u32 >> 15) ^ ( vx.u32 >> 9); + vx.u16[0] *= (uint16_t)m3; + vx.u16[1] *= (uint16_t)(m3 >> 16); + vx.u32 ^= (vx.u32 << 16) ^ ( vx.u32 >> 11); + return vx.u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i hv = _mm_setr_epi32((int)x, 0, 0, 0); + const __m128i sv = _mm_setr_epi32((int)seed2, 0, 0, 0); + const uint32_t *const result = (const uint32_t*)&hv; + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv, 6)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m1, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 11)), _mm_srli_epi32(hv, 19)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m2, 0, 0, 0)); + + hv = _mm_xor_si128(hv, sv); + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv, 9)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 16)), _mm_srli_epi32(hv, 11)); + + return *result; + } +# endif +} + +#define __NMH_M1 UINT32_C(0xF0D9649B) +#define __NMH_M2 UINT32_C(0x29A7935D) +#define __NMH_M3 UINT32_C(0x55D35831) + +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M1_V[32] = { + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M2_V[32] = { + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M3_V[32] = { + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, +}; + +static inline +uint32_t +NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed, int const type) +{ + /* base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = 0.93495901789135362 */ + uint32_t result = 0; +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } x[4], y[4]; + uint32_t const sl = seed + (uint32_t)len; + size_t j; + x[0].u32 = NMH_PRIME32_1; + x[1].u32 = NMH_PRIME32_2; + x[2].u32 = NMH_PRIME32_3; + x[3].u32 = NMH_PRIME32_4; + for (j = 0; j < 4; ++j) y[j].u32 = sl; + + if (type) { + /* 33 to 255 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4 + 16); + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + len - 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + len - 16 + j * 4); + } else { + /* 9 to 32 bytes */ + x[0].u32 ^= NMH_readLE32(p); + x[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3)); + x[2].u32 ^= NMH_readLE32(p + len - 8); + x[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3)); + y[0].u32 ^= NMH_readLE32(p + 4); + y[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3) + 4); + y[2].u32 ^= NMH_readLE32(p + len - 8 + 4); + y[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4); + } + + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + for (j = 0; j < 4; ++j) y[j].u32 ^= (y[j].u32 << 17) ^ (y[j].u32 >> 6); + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + + x[0].u32 ^= NMH_PRIME32_1; + x[1].u32 ^= NMH_PRIME32_2; + x[2].u32 ^= NMH_PRIME32_3; + x[3].u32 ^= NMH_PRIME32_4; + + for (j = 1; j < 4; ++j) x[0].u32 += x[j].u32; + + x[0].u32 ^= sl + (sl >> 5); + x[0].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[0].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + x[0].u32 ^= (x[0].u32 >> 10) ^ (x[0].u32 >> 20); + + result = x[0].u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i const h0 = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2, (int)NMH_PRIME32_3, (int)NMH_PRIME32_4); + __m128i const sl = _mm_set1_epi32((int)seed + (int)len); + __m128i const m1 = _mm_set1_epi32((int)__NMH_M1); + __m128i const m2 = _mm_set1_epi32((int)__NMH_M2); + __m128i const m3 = _mm_set1_epi32((int)__NMH_M3); + __m128i x = h0; + __m128i y = sl; + const uint32_t *const px = (const uint32_t*)&x; + + if (type) { + /* 32 to 127 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + i * 32 + 16))); + x = _mm_add_epi32(x, y); + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + } + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + len - 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + len - 16))); + } else { + /* 9 to 32 bytes */ + x = _mm_xor_si128(x, _mm_setr_epi32((int)NMH_readLE32(p), (int)NMH_readLE32(p + ((len>>4)<<3)), (int)NMH_readLE32(p + len - 8), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3)))); + y = _mm_xor_si128(y, _mm_setr_epi32((int)NMH_readLE32(p + 4), (int)NMH_readLE32(p + ((len>>4)<<3) + 4), (int)NMH_readLE32(p + len - 8 + 4), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4))); + } + + x = _mm_add_epi32(x, y); + + y = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y, 6)); + + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + x = _mm_xor_si128(x, h0); + x = _mm_add_epi32(x, _mm_srli_si128(x, 4)); + x = _mm_add_epi32(x, _mm_srli_si128(x, 8)); + + x = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5))); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + result = *px; + } +# endif + return *&result; +} +#define NMHASH32_9to32(p, len, seed) NMHASH32_9to255(p, len, seed, 0) +#define NMHASH32_33to255(p, len, seed) NMHASH32_9to255(p, len, seed, 1) + +#undef __NMH_M1 +#undef __NMH_M2 +#undef __NMH_M3 + +#if NMH_VECTOR == NMH_SCALAR +#define NMHASH32_long_round NMHASH32_long_round_scalar +static inline +void +NMHASH32_long_round_scalar(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + /* breadth first calculation will hint some compiler to auto vectorize the code + * on gcc, the performance becomes 10x than the depth first, and about 80% of the manually vectorized code + */ + const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); + size_t i; + + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= NMH_readLE32(p + i * 4); + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= NMH_readLE32(p + i * 4 + sizeof(NMH_ACC_INIT)); + } + for (i = 0; i < nbGroups; ++i) { + accX[i] += accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= accX[i] >> 1; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 5 ^ accX[i] >> 13; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M2_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 11 ^ accX[i] >> 9; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M3_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] >> 10 ^ accX[i] >> 20; + } +} +#endif + +#if NMH_VECTOR == NMH_SSE2 +# define _NMH_MM_(F) _mm_ ## F +# define _NMH_MMW_(F) _mm_ ## F ## 128 +# define _NMH_MM_T __m128i +#elif NMH_VECTOR == NMH_AVX2 +# define _NMH_MM_(F) _mm256_ ## F +# define _NMH_MMW_(F) _mm256_ ## F ## 256 +# define _NMH_MM_T __m256i +#elif NMH_VECTOR == NMH_AVX512 +# define _NMH_MM_(F) _mm512_ ## F +# define _NMH_MMW_(F) _mm512_ ## F ## 512 +# define _NMH_MM_T __m512i +#endif + +#if NMH_VECTOR == NMH_SSE2 || NMH_VECTOR == NMH_AVX2 || NMH_VECTOR == NMH_AVX512 +# define NMHASH32_long_round NMHASH32_long_round_sse +# define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT))) +static inline +void +NMHASH32_long_round_sse(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + const _NMH_MM_T *const NMH_RESTRICT m1 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M1_V; + const _NMH_MM_T *const NMH_RESTRICT m2 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M2_V; + const _NMH_MM_T *const NMH_RESTRICT m3 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M3_V; + _NMH_MM_T *const xaccX = ( _NMH_MM_T * )accX; + _NMH_MM_T *const xaccY = ( _NMH_MM_T * )accY; + _NMH_MM_T *const xp = ( _NMH_MM_T * )p; + size_t i; + + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], _NMH_MMW_(loadu_si)(xp + i)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MMW_(loadu_si)(xp + i + NMH_VECTOR_NB_GROUP)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(add_epi32)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MM_(srli_epi32)(xaccX[i], 1)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m1); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m2); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m3); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20)); + } +} +# undef _NMH_MM_ +# undef _NMH_MMW_ +# undef _NMH_MM_T +# undef NMH_VECTOR_NB_GROUP +#endif + +static +uint32_t +NMHASH32_long(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)]; + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accY[sizeof(accX)/sizeof(*accX)]; + size_t const nbRounds = (len - 1) / (sizeof(accX) + sizeof(accY)); + size_t i; + uint32_t sum = 0; + + /* init */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] = NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accY)/sizeof(*accY); ++i) accY[i] = seed; + + for (i = 0; i < nbRounds; ++i) { + NMHASH32_long_round(accX, accY, p + i * (sizeof(accX) + sizeof(accY))); + } + NMHASH32_long_round(accX, accY, p + len - (sizeof(accX) + sizeof(accY))); + + /* merge acc */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] ^= NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) sum += accX[i]; + +# if SIZE_MAX > UINT32_C(-1) + sum += (uint32_t)(len >> 32); +# endif + return sum ^ (uint32_t)len; +} + +static inline +uint32_t +NMHASH32_avalanche32(uint32_t const x) +{ + /* [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 */ + const uint32_t m1 = UINT32_C(0xCCE5196D); + const uint32_t m2 = UINT32_C(0x464BE229); + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 8) ^ (vx.u32 >> 21); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m1); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m1 >> 16)); + vx.u32 ^= (vx.u32 << 12) ^ (vx.u32 >> 7); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m2); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m2 >> 16)); + return vx.u32 ^ (vx.u32 >> 8) ^ (vx.u32 >> 21); +} + +static inline +uint32_t +NMHASH32(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 32)) { + if(NMH_likely(len > 8)) { + return NMHASH32_9to32(p, len, seed); + } + if(NMH_likely(len > 4)) { + uint32_t x = NMH_readLE32(p); + uint32_t y = NMH_readLE32(p + len - 4) ^ (NMH_PRIME32_4 + 2 + seed); + x += y; + x ^= x << (len + 7); + return NMHASH32_0to8(x, NMH_rotl32(y, 5)); + } else { + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_3; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32_0to8(data.u32 + seed, NMH_rotl32(seed, 5)); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32_33to255(p, len, seed); + } + return NMHASH32_avalanche32(NMHASH32_long(p, len, seed)); +} + +static inline +uint32_t +NMHASH32X_0to4(uint32_t x, uint32_t const seed) +{ + /* [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 */ + x ^= seed; + x *= UINT32_C(0xBDAB1EA9); + x += NMH_rotl32(seed, 31); + x ^= x >> 18; + x *= UINT32_C(0xA7896A1B); + x ^= x >> 12; + x *= UINT32_C(0x83796A2D); + x ^= x >> 16; + return x; +} + +static inline +uint32_t +NMHASH32X_5to8(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - 5 to 9 bytes + * - mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 */ + + uint32_t x = NMH_readLE32(p) ^ NMH_PRIME32_3; + uint32_t const y = NMH_readLE32(p + len - 4) ^ seed; + x += y; + x ^= x >> len; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + return x; +} + +static inline +uint32_t +NMHASH32X_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - at least 9 bytes + * - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + * - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 + */ + + uint32_t x = NMH_PRIME32_3; + uint32_t y = seed; + uint32_t a = NMH_PRIME32_4; + uint32_t b = seed; + size_t i, r = (len - 1) / 16; + + for (i = 0; i < r; ++i) { + x ^= NMH_readLE32(p + i * 16 + 0); + y ^= NMH_readLE32(p + i * 16 + 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + y = NMH_rotl32(y, 4); + x ^= y; + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + + a ^= NMH_readLE32(p + i * 16 + 8); + b ^= NMH_readLE32(p + i * 16 + 12); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + b = NMH_rotl32(b, 3); + a ^= b; + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + a ^= a >> 12; + } + + if (NMH_likely(((uint8_t)len-1) & 8)) { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16 + 0); + b ^= NMH_readLE32(p + r * 16 + 4); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + a ^= NMH_rotl32(b, 4); + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + } else { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + + x ^= NMH_readLE32(p + len - 8); + y ^= NMH_readLE32(p + len - 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + } else { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + x ^= NMH_readLE32(p + len - 4) + y; + x ^= x >> 16; + x *= UINT32_C(0xA52FB2CD); + x ^= x >> 15; + x *= UINT32_C(0x551E4D49); + } + + x ^= (uint32_t)len; + x ^= NMH_rotl32(a, 27); /* rotate one lane to pass Diff test */ + x ^= x >> 14; + x *= UINT32_C(0x141CC535); + + return x; +} + +static inline +uint32_t +NMHASH32X_avalanche32(uint32_t x) +{ + /* mixer with 2 mul from skeeto/hash-prospector: + * [15 d168aaad 15 af723597 15] = 0.15983776156606694 + */ + x ^= x >> 15; + x *= UINT32_C(0xD168AAAD); + x ^= x >> 15; + x *= UINT32_C(0xAF723597); + x ^= x >> 15; + return x; +} + +/* use 32*32->32 multiplication for short hash */ +static inline +uint32_t +NMHASH32X(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 8)) { + if (NMH_likely(len > 4)) { + return NMHASH32X_5to8(p, len, seed); + } else { + /* 0-4 bytes */ + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_1; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32X_0to4(data.u32, seed); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32X_9to255(p, len, seed); + } + return NMHASH32X_avalanche32(NMHASH32_long(p, len, seed)); +} + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(pop) +#endif +#ifdef __SDCC +# pragma restore +# undef const +#endif + +#endif /* _nmhash_h_ */ + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/nmhash_scalar.c b/src/tests/hash_functions/validation/nmhash_scalar.c new file mode 100644 index 000000000..051a65d5f --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash_scalar.c @@ -0,0 +1,8 @@ +#include "nmhash_scalar.h" +int32_t nmhash32_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32 (key, (const size_t) len, seed); +} + +int32_t nmhash32x_test ( const void * key, size_t len, uint32_t seed ) { + return NMHASH32X (key, (const size_t) len, seed); +} diff --git a/src/tests/hash_functions/validation/nmhash_scalar.h b/src/tests/hash_functions/validation/nmhash_scalar.h new file mode 100644 index 000000000..bee950670 --- /dev/null +++ b/src/tests/hash_functions/validation/nmhash_scalar.h @@ -0,0 +1,824 @@ +/* + * verification: + * NMHASH32: + * rurban/smhasher: 0x12A30553 + * demerphq/smhasher: 0x3D8F6C47 + * NMHASH32X: + * rurban/smhasher: 0xA8580227 + * demerphq/smhasher: 0x40B451B3 + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _nmhash_h_ +#define _nmhash_h_ + +#define NMH_VERSION 2 + +#ifdef _MSC_VER +# pragma warning(push, 3) +#endif + +#if defined(__cplusplus) && __cplusplus < 201103L +# define __STDC_CONSTANT_MACROS 1 +#endif + +#include +#include + +#if defined(__GNUC__) +# if defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# endif +#elif defined(_MSC_VER) +# include +#endif + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define NMH_likely(x) __builtin_expect(x, 1) +#else +# define NMH_likely(x) (x) +#endif + +#if defined(__has_builtin) +# if __has_builtin(__builtin_rotateleft32) +# define NMH_rotl32 __builtin_rotateleft32 /* clang */ +# endif +#endif +#if !defined(NMH_rotl32) +# if defined(_MSC_VER) + /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +# define NMH_rotl32(x,r) _rotl(x,r) +# else +# define NMH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# endif +#endif + +#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +# define NMH_RESTRICT /* disable */ +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define NMH_RESTRICT restrict +#elif defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)) +# define NMH_RESTRICT __restrict__ +#elif defined(__cplusplus) && defined(_MSC_VER) +# define NMH_RESTRICT __restrict +#else +# define NMH_RESTRICT /* disable */ +#endif + +/* endian macros */ +#ifndef NMHASH_LITTLE_ENDIAN +# if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || defined(__x86_64__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || defined(__SDCC) +# define NMHASH_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define NMHASH_LITTLE_ENDIAN 0 +# else +# warning could not determine endianness! Falling back to little endian. +# define NMHASH_LITTLE_ENDIAN 1 +# endif +#endif + +/* vector macros */ +#define NMH_SCALAR 0 +#define NMH_SSE2 1 +#define NMH_AVX2 2 +#define NMH_AVX512 3 + +#ifndef NMH_VECTOR /* can be defined on command line */ +# define NMH_VECTOR NMH_SCALAR +#endif + +/* align macros */ +#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ +# include +# define NMH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define NMH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define NMH_ALIGN(n) __declspec(align(n)) +#else +# define NMH_ALIGN(n) /* disabled */ +#endif + +#if NMH_VECTOR > 0 +# define NMH_ACC_ALIGN 64 +#elif defined(__BIGGEST_ALIGNMENT__) +# define NMH_ACC_ALIGN __BIGGEST_ALIGNMENT__ +#elif defined(__SDCC) +# define NMH_ACC_ALIGN 1 +#else +# define NMH_ACC_ALIGN 16 +#endif + +/* constants */ + +/* primes from xxh */ +#define NMH_PRIME32_1 UINT32_C(0x9E3779B1) +#define NMH_PRIME32_2 UINT32_C(0x85EBCA77) +#define NMH_PRIME32_3 UINT32_C(0xC2B2AE3D) +#define NMH_PRIME32_4 UINT32_C(0x27D4EB2F) + +/*! Pseudorandom secret taken directly from FARSH. */ +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t NMH_ACC_INIT[32] = { + UINT32_C(0xB8FE6C39), UINT32_C(0x23A44BBE), UINT32_C(0x7C01812C), UINT32_C(0xF721AD1C), + UINT32_C(0xDED46DE9), UINT32_C(0x839097DB), UINT32_C(0x7240A4A4), UINT32_C(0xB7B3671F), + UINT32_C(0xCB79E64E), UINT32_C(0xCCC0E578), UINT32_C(0x825AD07D), UINT32_C(0xCCFF7221), + UINT32_C(0xB8084674), UINT32_C(0xF743248E), UINT32_C(0xE03590E6), UINT32_C(0x813A264C), + + UINT32_C(0x3C2852BB), UINT32_C(0x91C300CB), UINT32_C(0x88D0658B), UINT32_C(0x1B532EA3), + UINT32_C(0x71644897), UINT32_C(0xA20DF94E), UINT32_C(0x3819EF46), UINT32_C(0xA9DEACD8), + UINT32_C(0xA8FA763F), UINT32_C(0xE39C343F), UINT32_C(0xF9DCBBC7), UINT32_C(0xC70B4F1D), + UINT32_C(0x8A51E04B), UINT32_C(0xCDB45931), UINT32_C(0xC89F7EC9), UINT32_C(0xD9787364), +}; + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(push) +# pragma warning(disable: 5045) +#endif +#ifdef __SDCC +# define const +# pragma save +# pragma disable_warning 110 +# pragma disable_warning 126 +#endif + +/* read functions */ +static inline +uint32_t +NMH_readLE32(const void *const p) +{ + uint32_t v; + memcpy(&v, p, 4); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) + return __builtin_bswap32(v); +# elif defined(_MSC_VER) + return _byteswap_ulong(v); +# else + return ((v >> 24) & 0xff) | ((v >> 8) & 0xff00) | ((v << 8) & 0xff0000) | ((v << 24) & 0xff000000); +# endif +} + +static inline +uint16_t +NMH_readLE16(const void *const p) +{ + uint16_t v; + memcpy(&v, p, 2); +# if (NMHASH_LITTLE_ENDIAN) + return v; +# else + return (uint16_t)((v << 8) | (v >> 8)); +# endif +} + +static inline +uint32_t +NMHASH32_0to8(uint32_t const x, uint32_t const seed2) +{ + /* base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] = 0.027071104091278835 */ + const uint32_t m1 = UINT32_C(0x776BF593); + const uint32_t m2 = UINT32_C(0x3FB39C65); + const uint32_t m3 = UINT32_C(0xE9139917); + +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 12) ^ (vx.u32 >> 6); + vx.u16[0] *= (uint16_t)m1; + vx.u16[1] *= (uint16_t)(m1 >> 16); + vx.u32 ^= (vx.u32 << 11) ^ ( vx.u32 >> 19); + vx.u16[0] *= (uint16_t)m2; + vx.u16[1] *= (uint16_t)(m2 >> 16); + vx.u32 ^= seed2; + vx.u32 ^= (vx.u32 >> 15) ^ ( vx.u32 >> 9); + vx.u16[0] *= (uint16_t)m3; + vx.u16[1] *= (uint16_t)(m3 >> 16); + vx.u32 ^= (vx.u32 << 16) ^ ( vx.u32 >> 11); + return vx.u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i hv = _mm_setr_epi32((int)x, 0, 0, 0); + const __m128i sv = _mm_setr_epi32((int)seed2, 0, 0, 0); + const uint32_t *const result = (const uint32_t*)&hv; + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv, 6)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m1, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 11)), _mm_srli_epi32(hv, 19)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m2, 0, 0, 0)); + + hv = _mm_xor_si128(hv, sv); + + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv, 9)); + hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0)); + hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 16)), _mm_srli_epi32(hv, 11)); + + return *result; + } +# endif +} + +#define __NMH_M1 UINT32_C(0xF0D9649B) +#define __NMH_M2 UINT32_C(0x29A7935D) +#define __NMH_M3 UINT32_C(0x55D35831) + +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M1_V[32] = { + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, + __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, __NMH_M1, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M2_V[32] = { + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, + __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, __NMH_M2, +}; +NMH_ALIGN(NMH_ACC_ALIGN) static const uint32_t __NMH_M3_V[32] = { + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, + __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, __NMH_M3, +}; + +static inline +uint32_t +NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed, int const type) +{ + /* base mixer: [f0d9649b 5 -13 29a7935d -9 11 55d35831 -20 -10 ] = 0.93495901789135362 */ + uint32_t result = 0; +# if NMH_VECTOR == NMH_SCALAR + { + union { uint32_t u32; uint16_t u16[2]; } x[4], y[4]; + uint32_t const sl = seed + (uint32_t)len; + size_t j; + x[0].u32 = NMH_PRIME32_1; + x[1].u32 = NMH_PRIME32_2; + x[2].u32 = NMH_PRIME32_3; + x[3].u32 = NMH_PRIME32_4; + for (j = 0; j < 4; ++j) y[j].u32 = sl; + + if (type) { + /* 33 to 255 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + i * 32 + j * 4 + 16); + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= NMH_readLE32(p + len - 32 + j * 4); + for (j = 0; j < 4; ++j) y[j].u32 ^= NMH_readLE32(p + len - 16 + j * 4); + } else { + /* 9 to 32 bytes */ + x[0].u32 ^= NMH_readLE32(p); + x[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3)); + x[2].u32 ^= NMH_readLE32(p + len - 8); + x[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3)); + y[0].u32 ^= NMH_readLE32(p + 4); + y[1].u32 ^= NMH_readLE32(p + ((len>>4)<<3) + 4); + y[2].u32 ^= NMH_readLE32(p + len - 8 + 4); + y[3].u32 ^= NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4); + } + + for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32; + for (j = 0; j < 4; ++j) y[j].u32 ^= (y[j].u32 << 17) ^ (y[j].u32 >> 6); + + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16); + } + + for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32; + + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9); + for (j = 0; j < 4; ++j) { + x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + } + for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20); + + x[0].u32 ^= NMH_PRIME32_1; + x[1].u32 ^= NMH_PRIME32_2; + x[2].u32 ^= NMH_PRIME32_3; + x[3].u32 ^= NMH_PRIME32_4; + + for (j = 1; j < 4; ++j) x[0].u32 += x[j].u32; + + x[0].u32 ^= sl + (sl >> 5); + x[0].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF); + x[0].u16[1] *= (uint16_t)(__NMH_M3 >> 16); + x[0].u32 ^= (x[0].u32 >> 10) ^ (x[0].u32 >> 20); + + result = x[0].u32; + } +# else /* at least NMH_SSE2 */ + { + __m128i const h0 = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2, (int)NMH_PRIME32_3, (int)NMH_PRIME32_4); + __m128i const sl = _mm_set1_epi32((int)seed + (int)len); + __m128i const m1 = _mm_set1_epi32((int)__NMH_M1); + __m128i const m2 = _mm_set1_epi32((int)__NMH_M2); + __m128i const m3 = _mm_set1_epi32((int)__NMH_M3); + __m128i x = h0; + __m128i y = sl; + const uint32_t *const px = (const uint32_t*)&x; + + if (type) { + /* 32 to 127 bytes */ + size_t const r = (len - 1) / 32; + size_t i; + for (i = 0; i < r; ++i) { + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + i * 32 + 16))); + x = _mm_add_epi32(x, y); + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + } + x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + len - 32))); + y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + len - 16))); + } else { + /* 9 to 32 bytes */ + x = _mm_xor_si128(x, _mm_setr_epi32((int)NMH_readLE32(p), (int)NMH_readLE32(p + ((len>>4)<<3)), (int)NMH_readLE32(p + len - 8), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3)))); + y = _mm_xor_si128(y, _mm_setr_epi32((int)NMH_readLE32(p + 4), (int)NMH_readLE32(p + ((len>>4)<<3) + 4), (int)NMH_readLE32(p + len - 8 + 4), (int)NMH_readLE32(p + len - 8 - ((len>>4)<<3) + 4))); + } + + x = _mm_add_epi32(x, y); + + y = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y, 6)); + + x = _mm_mullo_epi16(x, m1); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13)); + x = _mm_mullo_epi16(x, m2); + x = _mm_xor_si128(x, y); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9)); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + x = _mm_xor_si128(x, h0); + x = _mm_add_epi32(x, _mm_srli_si128(x, 4)); + x = _mm_add_epi32(x, _mm_srli_si128(x, 8)); + + x = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5))); + x = _mm_mullo_epi16(x, m3); + x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20)); + + result = *px; + } +# endif + return *&result; +} +#define NMHASH32_9to32(p, len, seed) NMHASH32_9to255(p, len, seed, 0) +#define NMHASH32_33to255(p, len, seed) NMHASH32_9to255(p, len, seed, 1) + +#undef __NMH_M1 +#undef __NMH_M2 +#undef __NMH_M3 + +#if NMH_VECTOR == NMH_SCALAR +#define NMHASH32_long_round NMHASH32_long_round_scalar +static inline +void +NMHASH32_long_round_scalar(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + /* breadth first calculation will hint some compiler to auto vectorize the code + * on gcc, the performance becomes 10x than the depth first, and about 80% of the manually vectorized code + */ + const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT); + size_t i; + + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= NMH_readLE32(p + i * 4); + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= NMH_readLE32(p + i * 4 + sizeof(NMH_ACC_INIT)); + } + for (i = 0; i < nbGroups; ++i) { + accX[i] += accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accY[i] ^= accX[i] >> 1; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 5 ^ accX[i] >> 13; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M2_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accY[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] << 11 ^ accX[i] >> 9; + } + for (i = 0; i < nbGroups * 2; ++i) { + ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M3_V)[i]; + } + for (i = 0; i < nbGroups; ++i) { + accX[i] ^= accX[i] >> 10 ^ accX[i] >> 20; + } +} +#endif + +#if NMH_VECTOR == NMH_SSE2 +# define _NMH_MM_(F) _mm_ ## F +# define _NMH_MMW_(F) _mm_ ## F ## 128 +# define _NMH_MM_T __m128i +#elif NMH_VECTOR == NMH_AVX2 +# define _NMH_MM_(F) _mm256_ ## F +# define _NMH_MMW_(F) _mm256_ ## F ## 256 +# define _NMH_MM_T __m256i +#elif NMH_VECTOR == NMH_AVX512 +# define _NMH_MM_(F) _mm512_ ## F +# define _NMH_MMW_(F) _mm512_ ## F ## 512 +# define _NMH_MM_T __m512i +#endif + +#if NMH_VECTOR == NMH_SSE2 || NMH_VECTOR == NMH_AVX2 || NMH_VECTOR == NMH_AVX512 +# define NMHASH32_long_round NMHASH32_long_round_sse +# define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT))) +static inline +void +NMHASH32_long_round_sse(uint32_t *const NMH_RESTRICT accX, uint32_t *const NMH_RESTRICT accY, const uint8_t* const NMH_RESTRICT p) +{ + const _NMH_MM_T *const NMH_RESTRICT m1 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M1_V; + const _NMH_MM_T *const NMH_RESTRICT m2 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M2_V; + const _NMH_MM_T *const NMH_RESTRICT m3 = (const _NMH_MM_T * NMH_RESTRICT)__NMH_M3_V; + _NMH_MM_T *const xaccX = ( _NMH_MM_T * )accX; + _NMH_MM_T *const xaccY = ( _NMH_MM_T * )accY; + _NMH_MM_T *const xp = ( _NMH_MM_T * )p; + size_t i; + + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], _NMH_MMW_(loadu_si)(xp + i)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MMW_(loadu_si)(xp + i + NMH_VECTOR_NB_GROUP)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(add_epi32)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccY[i] = _NMH_MMW_(xor_si)(xaccY[i], _NMH_MM_(srli_epi32)(xaccX[i], 1)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m1); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m2); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], xaccY[i]); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9)); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m3); + } + for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) { + xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20)); + } +} +# undef _NMH_MM_ +# undef _NMH_MMW_ +# undef _NMH_MM_T +# undef NMH_VECTOR_NB_GROUP +#endif + +static +uint32_t +NMHASH32_long(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)]; + NMH_ALIGN(NMH_ACC_ALIGN) uint32_t accY[sizeof(accX)/sizeof(*accX)]; + size_t const nbRounds = (len - 1) / (sizeof(accX) + sizeof(accY)); + size_t i; + uint32_t sum = 0; + + /* init */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] = NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accY)/sizeof(*accY); ++i) accY[i] = seed; + + for (i = 0; i < nbRounds; ++i) { + NMHASH32_long_round(accX, accY, p + i * (sizeof(accX) + sizeof(accY))); + } + NMHASH32_long_round(accX, accY, p + len - (sizeof(accX) + sizeof(accY))); + + /* merge acc */ + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] ^= NMH_ACC_INIT[i]; + for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) sum += accX[i]; + +# if SIZE_MAX > UINT32_C(-1) + sum += (uint32_t)(len >> 32); +# endif + return sum ^ (uint32_t)len; +} + +static inline +uint32_t +NMHASH32_avalanche32(uint32_t const x) +{ + /* [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 */ + const uint32_t m1 = UINT32_C(0xCCE5196D); + const uint32_t m2 = UINT32_C(0x464BE229); + union { uint32_t u32; uint16_t u16[2]; } vx; + vx.u32 = x; + vx.u32 ^= (vx.u32 >> 8) ^ (vx.u32 >> 21); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m1); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m1 >> 16)); + vx.u32 ^= (vx.u32 << 12) ^ (vx.u32 >> 7); + vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m2); + vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m2 >> 16)); + return vx.u32 ^ (vx.u32 >> 8) ^ (vx.u32 >> 21); +} + +static inline +uint32_t +NMHASH32(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 32)) { + if(NMH_likely(len > 8)) { + return NMHASH32_9to32(p, len, seed); + } + if(NMH_likely(len > 4)) { + uint32_t x = NMH_readLE32(p); + uint32_t y = NMH_readLE32(p + len - 4) ^ (NMH_PRIME32_4 + 2 + seed); + x += y; + x ^= x << (len + 7); + return NMHASH32_0to8(x, NMH_rotl32(y, 5)); + } else { + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_3; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32_0to8(data.u32 + seed, NMH_rotl32(seed, 5)); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32_33to255(p, len, seed); + } + return NMHASH32_avalanche32(NMHASH32_long(p, len, seed)); +} + +static inline +uint32_t +NMHASH32X_0to4(uint32_t x, uint32_t const seed) +{ + /* [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 */ + x ^= seed; + x *= UINT32_C(0xBDAB1EA9); + x += NMH_rotl32(seed, 31); + x ^= x >> 18; + x *= UINT32_C(0xA7896A1B); + x ^= x >> 12; + x *= UINT32_C(0x83796A2D); + x ^= x >> 16; + return x; +} + +static inline +uint32_t +NMHASH32X_5to8(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - 5 to 9 bytes + * - mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 */ + + uint32_t x = NMH_readLE32(p) ^ NMH_PRIME32_3; + uint32_t const y = NMH_readLE32(p + len - 4) ^ seed; + x += y; + x ^= x >> len; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + return x; +} + +static inline +uint32_t +NMHASH32X_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t const seed) +{ + /* - at least 9 bytes + * - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246 + * - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322 + */ + + uint32_t x = NMH_PRIME32_3; + uint32_t y = seed; + uint32_t a = NMH_PRIME32_4; + uint32_t b = seed; + size_t i, r = (len - 1) / 16; + + for (i = 0; i < r; ++i) { + x ^= NMH_readLE32(p + i * 16 + 0); + y ^= NMH_readLE32(p + i * 16 + 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + y = NMH_rotl32(y, 4); + x ^= y; + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + x ^= x >> 12; + + a ^= NMH_readLE32(p + i * 16 + 8); + b ^= NMH_readLE32(p + i * 16 + 12); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + b = NMH_rotl32(b, 3); + a ^= b; + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + a ^= a >> 12; + } + + if (NMH_likely(((uint8_t)len-1) & 8)) { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16 + 0); + b ^= NMH_readLE32(p + r * 16 + 4); + a ^= b; + a *= UINT32_C(0x11049A7D); + a ^= a >> 23; + a *= UINT32_C(0xBCCCDC7B); + a ^= NMH_rotl32(b, 4); + a ^= a >> 12; + a *= UINT32_C(0x065E9DAD); + } else { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + + x ^= NMH_readLE32(p + len - 8); + y ^= NMH_readLE32(p + len - 4); + x ^= y; + x *= UINT32_C(0x11049A7D); + x ^= x >> 23; + x *= UINT32_C(0xBCCCDC7B); + x ^= NMH_rotl32(y, 3); + x ^= x >> 12; + x *= UINT32_C(0x065E9DAD); + } else { + if (NMH_likely(((uint8_t)len-1) & 4)) { + a ^= NMH_readLE32(p + r * 16) + b; + a ^= a >> 16; + a *= UINT32_C(0xA52FB2CD); + a ^= a >> 15; + a *= UINT32_C(0x551E4D49); + } + x ^= NMH_readLE32(p + len - 4) + y; + x ^= x >> 16; + x *= UINT32_C(0xA52FB2CD); + x ^= x >> 15; + x *= UINT32_C(0x551E4D49); + } + + x ^= (uint32_t)len; + x ^= NMH_rotl32(a, 27); /* rotate one lane to pass Diff test */ + x ^= x >> 14; + x *= UINT32_C(0x141CC535); + + return x; +} + +static inline +uint32_t +NMHASH32X_avalanche32(uint32_t x) +{ + /* mixer with 2 mul from skeeto/hash-prospector: + * [15 d168aaad 15 af723597 15] = 0.15983776156606694 + */ + x ^= x >> 15; + x *= UINT32_C(0xD168AAAD); + x ^= x >> 15; + x *= UINT32_C(0xAF723597); + x ^= x >> 15; + return x; +} + +/* use 32*32->32 multiplication for short hash */ +static inline +uint32_t +NMHASH32X(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed) +{ + const uint8_t *const p = (const uint8_t *)input; + if (NMH_likely(len <= 8)) { + if (NMH_likely(len > 4)) { + return NMHASH32X_5to8(p, len, seed); + } else { + /* 0-4 bytes */ + union { uint32_t u32; uint16_t u16[2]; uint8_t u8[4]; } data; + switch (len) { + case 0: seed += NMH_PRIME32_2; + data.u32 = 0; + break; + case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1); + data.u32 = p[0]; + break; + case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1); + data.u32 = NMH_readLE16(p); + break; + case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1); + data.u16[1] = p[2]; + data.u16[0] = NMH_readLE16(p); + break; + case 4: seed += NMH_PRIME32_1; + data.u32 = NMH_readLE32(p); + break; + default: return 0; + } + return NMHASH32X_0to4(data.u32, seed); + } + } + if (NMH_likely(len < 256)) { + return NMHASH32X_9to255(p, len, seed); + } + return NMHASH32X_avalanche32(NMHASH32_long(p, len, seed)); +} + +#if defined(_MSC_VER) && _MSC_VER >= 1914 +# pragma warning(pop) +#endif +#ifdef __SDCC +# pragma restore +# undef const +#endif + +#endif /* _nmhash_h_ */ + +#ifdef __cplusplus +} +#endif diff --git a/src/tests/hash_functions/validation/pengyhash.c b/src/tests/hash_functions/validation/pengyhash.c new file mode 100644 index 000000000..d7b1ec02d --- /dev/null +++ b/src/tests/hash_functions/validation/pengyhash.c @@ -0,0 +1,30 @@ +/* pengyhash v0.2 */ + +#include "pengyhash.h" + +uint64_t pengyhash(const void *p, size_t size, uint32_t seed) +{ + uint64_t b[4] = { 0 }; + uint64_t s[4] = { 0, 0, 0, size }; + int i; + + for(; size >= 32; size -= 32, p = (const char*)p + 32) { + memcpy(b, p, 32); + + s[1] = (s[0] += s[1] + b[3]) + (s[1] << 14 | s[1] >> 50); + s[3] = (s[2] += s[3] + b[2]) + (s[3] << 23 | s[3] >> 41); + s[3] = (s[0] += s[3] + b[1]) ^ (s[3] << 16 | s[3] >> 48); + s[1] = (s[2] += s[1] + b[0]) ^ (s[1] << 40 | s[1] >> 24); + } + + memcpy(b, p, size); + + for(i = 0; i < 6; i++) { + s[1] = (s[0] += s[1] + b[3]) + (s[1] << 14 | s[1] >> 50) + seed; + s[3] = (s[2] += s[3] + b[2]) + (s[3] << 23 | s[3] >> 41); + s[3] = (s[0] += s[3] + b[1]) ^ (s[3] << 16 | s[3] >> 48); + s[1] = (s[2] += s[1] + b[0]) ^ (s[1] << 40 | s[1] >> 24); + } + + return s[0] + s[1] + s[2] + s[3]; +} diff --git a/src/tests/hash_functions/validation/pengyhash.h b/src/tests/hash_functions/validation/pengyhash.h new file mode 100644 index 000000000..b9ff7010c --- /dev/null +++ b/src/tests/hash_functions/validation/pengyhash.h @@ -0,0 +1,9 @@ +#ifndef _PENGYHASH_H +#define _PENGYHASH_H + +#include +#include + +uint64_t pengyhash(const void *p, size_t size, uint32_t seed); + +#endif diff --git a/src/tests/hash_functions/validation/waterhash.c b/src/tests/hash_functions/validation/waterhash.c new file mode 100644 index 000000000..7d6c92d99 --- /dev/null +++ b/src/tests/hash_functions/validation/waterhash.c @@ -0,0 +1,6 @@ +#include "waterhash.h" + +int32_t waterhash_test ( const void * key, uint32_t len, uint64_t seed ) { + return waterhash (key, len, seed); +} + diff --git a/src/tests/hash_functions/validation/waterhash.h b/src/tests/hash_functions/validation/waterhash.h new file mode 100644 index 000000000..d05dc1269 --- /dev/null +++ b/src/tests/hash_functions/validation/waterhash.h @@ -0,0 +1,54 @@ +/* + Waterhash takes (optimally) 32-bit inputs and produces a 32-bit hash as its result. + It is an edited version of wyhash that uses at most 64-bit math instead of 128-bit. + It is meant to use very similar code to Wheathash, which produces a 64-bit hash. + Original Author: Wang Yi + Waterhash Variant Author: Tommy Ettinger +*/ +#ifndef waterhash_version_3 +#define waterhash_version_3 +#include +#include +#include +const uint64_t _waterp0 = 0xa0761d65ull, _waterp1 = 0xe7037ed1ull, _waterp2 = 0x8ebc6af1ull; +const uint64_t _waterp3 = 0x589965cdull, _waterp4 = 0x1d8e4e27ull, _waterp5 = 0xeb44accbull; + +static inline uint64_t _watermum(const uint64_t A, const uint64_t B) { + uint64_t r = A * B; + return r - (r >> 32); +} + +static inline uint64_t _waterr08(const uint8_t *p){ uint8_t v; memcpy(&v, p, 1); return v; } +static inline uint64_t _waterr16(const uint8_t *p){ uint16_t v; memcpy(&v, p, 2); return v; } +static inline uint64_t _waterr32(const uint8_t *p){ uint32_t v; memcpy(&v, p, 4); return v; } +static inline uint32_t waterhash(const void* key, uint32_t len, uint64_t seed){ + const uint8_t *p = (const uint8_t*)key; + uint32_t i; + for (i = 0; i + 16 <= len; i += 16, p += 16) { + seed = _watermum( + _watermum(_waterr32(p) ^ _waterp1, _waterr32(p + 4) ^ _waterp2) + seed, + _watermum(_waterr32(p + 8) ^ _waterp3, _waterr32(p + 12) ^ _waterp4)); + } + seed += _waterp5; + switch (len & 15) { + case 1: seed = _watermum(_waterp2 ^ seed, _waterr08(p) ^ _waterp1); break; + case 2: seed = _watermum(_waterp3 ^ seed, _waterr16(p) ^ _waterp4); break; + case 3: seed = _watermum(_waterr16(p) ^ seed, _waterr08(p + 2) ^ _waterp2); break; + case 4: seed = _watermum(_waterr16(p) ^ seed, _waterr16(p + 2) ^ _waterp3); break; + case 5: seed = _watermum(_waterr32(p) ^ seed, _waterr08(p + 4) ^ _waterp1); break; + case 6: seed = _watermum(_waterr32(p) ^ seed, _waterr16(p + 4) ^ _waterp1); break; + case 7: seed = _watermum(_waterr32(p) ^ seed, (_waterr16(p + 4) << 8 | _waterr08(p + 6)) ^ _waterp1); break; + case 8: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp0); break; + case 9: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterp4, _waterr08(p + 8) ^ _waterp3); break; + case 10: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed, _waterr16(p + 8) ^ _waterp3); break; + case 11: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed, ((_waterr16(p + 8) << 8) | _waterr08(p + 10)) ^ _waterp3); break; + case 12: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), _waterp4); break; + case 13: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr08(p + 12)) ^ _waterp4); break; + case 14: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr16(p + 12)) ^ _waterp4); break; + case 15: seed = _watermum(_waterr32(p) ^ seed, _waterr32(p + 4) ^ _waterp2) ^ _watermum(seed ^ _waterr32(p + 8), (_waterr16(p + 12) << 8 | _waterr08(p + 14)) ^ _waterp4); break; + } + seed = (seed ^ seed << 16) * (len ^ _waterp0); + return (uint32_t)(seed - (seed >> 32)); +} +#endif +