Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expands smash algorithms to support murmur3, sha256, sha512 & md5 #25

Merged
merged 2 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions docs/algorithms.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Algorithms Supported

`smash` supports a variety of hashing algorithms.

<table>
<thead>
<tr>
<th>Algorithm</th>
<th>Default</th>
<th>Variations / Aliases</th>
</tr>
</thead>
<tbody>
<tr>
<td>
xxhash<br/>
<sub><sup><a href="https://xxhash.com/">learn more</a></sup></sub>
</td>
<td><code>xxhash</code></td>
<td>
<ul>
<li><code>xxhash</code></li>
</ul>
</td>
</tr>
<tr>
<td>
murmur3<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/MurmurHash">learn more</a></sup></sub>
</td>
<td><code>murmur3</code></td>
<td>
<ul>
<li><code>murmur3</code> (alias: <code>murmur3-128</code>)</li>
<li><code>murmur3-128</code></li>
<li><code>murmur3-64</code></li>
<li><code>murmur3-32</code></li>
</ul>
</td>
</tr>
<tr>
<td>SHA-256</td>
<td><code>sha256</code></td>
<td>
<ul>
<li><code>sha256</code></li>
<li><code>sha-256</code></li>
</ul>
</td>
</tr>
<tr>
<td>SHA-512</td>
<td><code>sha512</code></td>
<td>
<ul>
<li><code>sha512</code></li>
<li><code>sha-512</code></li>
</ul>
</td>
</tr>
<tr>
<td>MD5</td>
<td><code>md5</code></td>
<td>
<ul>
<li><code>md5</code></li>
</ul>
</td>
</tr>
<tr>
<td>FNV128<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">learn more</a></sup></sub></td>
<td><code>fnv128</code></td>
<td>
<ul>
<li><code>fnv128</code></li>
<li><code>fnv-128</code></li>
</ul>
</td>
</tr>
<tr>
<td>FNV128a<br/>
<sub><sup><a href="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">learn more</a></sup></sub></td>
<td><code>fnv128a</code></td>
<td>
<ul>
<li><code>fnv</code> (alias: <code>fnv128a</code>)</li>
<li><code>fnv128a</code></li>
<li><code>fnv-128a</code></li>
</ul>
</td>
</tr>
</tbody>
</table>

Generally, when slicing is enabled (default), we'd recommend `xxhash` or `murmur3`.

When you're wanting a full hash (`--disable-slicing` option), generally `sha512` or `sha-256`.
Binary file added docs/artefacts/smash-v0.0.3-long-demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/dustin/go-humanize v1.0.1
github.com/pterm/pterm v0.12.70
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72
github.com/spf13/cobra v1.8.0
github.com/thediveo/enumflag/v2 v2.0.5
golang.org/x/term v0.14.0
Expand Down
33 changes: 31 additions & 2 deletions internal/algorithms/algorithm.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
package algorithms

import (
md5h "crypto/md5"
sha256h "crypto/sha256"
sha512h "crypto/sha512"
"hash"
fnvh "hash/fnv"

"github.com/spaolacci/murmur3"

cxHash "github.com/cespare/xxhash"
)

Expand All @@ -13,13 +18,25 @@ const (
Xxhash Algorithm = iota
Fnv128
Fnv128a
Murmur3_128
Murmur3_64
Murmur3_32
Md5
Sha256
Sha512
)

// HashAlgorithms Used by CLI for validating --algorithm flag
var HashAlgorithms = map[int][]string{
0: {"xxhash"},
1: {"fnv128"},
2: {"fnv128a"},
2: {"fnv128a", "fnv"},
3: {"murmur3-128", "murmur3"},
4: {"murmur3-64"},
5: {"murmur3-32"},
6: {"md5"},
7: {"sha-256", "sha256"},
8: {"sha-512", "sha512"},
}

// New Instantiates a new representation of the Hash Algorithm.
Expand All @@ -31,8 +48,20 @@ func (a Algorithm) New() hash.Hash {
return fnvh.New128()
case Fnv128a:
return fnvh.New128a()
case Murmur3_32:
return murmur3.New32()
case Murmur3_64:
return murmur3.New64()
case Murmur3_128:
return murmur3.New128()
case Md5:
return md5h.New()
case Sha256:
return sha256h.New()
case Sha512:
return sha512h.New()
}
return fnvh.New128a()
return cxHash.New()
}

// Index Returns the index for the Hash Algorithm
Expand Down
4 changes: 2 additions & 2 deletions internal/cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ func init() {
rootCmd.PersistentFlags().Var(
enumflag.New(&af.Algorithm, "algorithm", algorithms.HashAlgorithms, enumflag.EnumCaseInsensitive),
"algorithm",
"Algorithm to use, can be 'xxhash', 'fnv128', 'fnv128a'")
"Algorithm to use to hash files. Supported: xxhash, murmur3, md5, sha512, sha256 (full list, see readme)")
flags := rootCmd.Flags()
flags.StringSliceVarP(&af.Base, "base", "", nil, "Base directories to use for comparison. Eg. --base=/c/dos,/c/run/dos/")
flags.StringSliceVarP(&af.Base, "base", "", nil, "Base directories to use for comparison. Eg. --base=/c/dos,/c/dos/run/,/run/dos/run")
flags.StringSliceVarP(&af.ExcludeFile, "exclude-file", "", nil, "Files to exclude separated by comma. Eg. --exclude-file=.gitignore,*.csv")
flags.StringSliceVarP(&af.ExcludeDir, "exclude-dir", "", nil, "Directories to exclude separated by comma. Eg. --exclude-dir=.git,.idea")
flags.IntVarP(&af.MaxThreads, "max-threads", "p", runtime.NumCPU(), "Maximum threads to utilise.")
Expand Down
102 changes: 102 additions & 0 deletions pkg/slicer/slicer_algorithm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
//nolint
package slicer

import (
"bytes"
"encoding/hex"
"io"
"os"
"strings"
"testing"

"github.com/thushan/smash/internal/algorithms"
)

// fieldalignment: struct with 40 pointer bytes could be 24 (govet)
// but this is nicer to see / read :)
var algoData = []struct {
algorithm algorithms.Algorithm
disableSlicing bool
filename string
expectHash string
}{
{algorithms.Xxhash, false, "./artefacts/test-manipulated.1mb", "4f595576799edcd9"},
{algorithms.Xxhash, true, "./artefacts/test-manipulated.1mb", "4a1960f16a88960c"},
{algorithms.Xxhash, false, "./artefacts/test.1mb", "bb83f43630ee546f"},
{algorithms.Xxhash, true, "./artefacts/test.1mb", "6b6255ee515dcc04"},
{algorithms.Murmur3_128, false, "./artefacts/test-manipulated.1mb", "daa0b57d39ab077f56bcdf855753d8dd"},
{algorithms.Murmur3_128, true, "./artefacts/test-manipulated.1mb", "7b49601fb19613cfa36cc032910228b7"},
{algorithms.Murmur3_128, false, "./artefacts/test.1mb", "92d0c527266ec9151a6a9239c105df84"},
{algorithms.Murmur3_128, true, "./artefacts/test.1mb", "35ec8ac6041a7e9b70c61cc30d40b592"},
{algorithms.Murmur3_64, false, "./artefacts/test-manipulated.1mb", "daa0b57d39ab077f"},
{algorithms.Murmur3_64, true, "./artefacts/test-manipulated.1mb", "7b49601fb19613cf"},
{algorithms.Murmur3_64, false, "./artefacts/test.1mb", "92d0c527266ec915"},
{algorithms.Murmur3_64, true, "./artefacts/test.1mb", "35ec8ac6041a7e9b"},
{algorithms.Murmur3_32, false, "./artefacts/test-manipulated.1mb", "eb6482f3"},
{algorithms.Murmur3_32, true, "./artefacts/test-manipulated.1mb", "e0fa6869"},
{algorithms.Murmur3_32, false, "./artefacts/test.1mb", "5ca146ee"},
{algorithms.Murmur3_32, true, "./artefacts/test.1mb", "3a3133fa"},
{algorithms.Fnv128, false, "./artefacts/test-manipulated.1mb", "e91da5b6fb6c3df866d19794bcc031a2"},
{algorithms.Fnv128, true, "./artefacts/test-manipulated.1mb", "8808e2a6d269deb5bce97f110f60e8dc"},
{algorithms.Fnv128, false, "./artefacts/test.1mb", "af25513dbbfb8ebf847829a2cd6e76f2"},
{algorithms.Fnv128, true, "./artefacts/test.1mb", "e55b683eca015645afc7316f7df9993b"},
{algorithms.Fnv128a, false, "./artefacts/test-manipulated.1mb", "04721f877b7be5ad3e487b87ad486f30"},
{algorithms.Fnv128a, true, "./artefacts/test-manipulated.1mb", "998f1046fb1e726b7dedd1eecd453c1a"},
{algorithms.Fnv128a, false, "./artefacts/test.1mb", "f80ebc069329ec8a59e2c444c300f218"},
{algorithms.Fnv128a, true, "./artefacts/test.1mb", "ebc231b45eb5b9c7be1c936829047f1f"},
{algorithms.Md5, false, "./artefacts/test-manipulated.1mb", "040ca2ff5e59e6b0870b0f68a92a3968"},
{algorithms.Md5, true, "./artefacts/test-manipulated.1mb", "df221ae4955e4b77f50ade6ab70c5210"},
{algorithms.Md5, false, "./artefacts/test.1mb", "546b9508c9650e5d2e0c1c15f63c342c"},
{algorithms.Md5, true, "./artefacts/test.1mb", "4c18efb7e70ac81f341ce3f5ef3684a4"},
{algorithms.Sha512, false, "./artefacts/test-manipulated.1mb", "b8b783b66d20b280709522abd2478f0f7e599a31d62d9f876d8d91e7ad3874e75964f5bbb2e35ca1380e4d28d9135c40b12d3cee7c7b1f89c29b5d2ef38d0cc7"},
{algorithms.Sha512, true, "./artefacts/test-manipulated.1mb", "dd69b1afbcb92135421574297fa47f612a23b386721b8562cd7852a0eebe0f4d8436d02b6773b7c072c18c67027d53eeedc9d18cc6171dfc82a907bfa570ae03"},
{algorithms.Sha512, false, "./artefacts/test.1mb", "88402b9df2f2dd06597f0a1db9c6257645acb6ddb949d4daa00a7f28dfd681b5a46cef809774e9c0e5f0f581d8a240eac62bde89d99220055342dae8d6e680cf"},
{algorithms.Sha512, true, "./artefacts/test.1mb", "8cedef8fa8d1ab8bdee1a9441165fe2af8ee37c9672e06f15ca30f5a3f840096585e474c2b800760bd66db96239f3c67761303ec1d87553f27afc7d8c9e7ea9f"},
{algorithms.Sha256, false, "./artefacts/test-manipulated.1mb", "9539725bbdda1bfb410c51d9ebc0ba72391e7ba2145e74422028253a30672506"},
{algorithms.Sha256, true, "./artefacts/test-manipulated.1mb", "aae139d218d16eb32cd63dc6f842f77c89a773fc26a8e7ef3b9023600fad3f17"},
{algorithms.Sha256, false, "./artefacts/test.1mb", "11cfdec95e731151953ab8dbe24de8b3c1a029731740ca649bc82f95338e0540"},
{algorithms.Sha256, true, "./artefacts/test.1mb", "e9403adc74d6a890a0db579ab217e2c4b0490b43e5a87552d3a239f1bdde91b8"},
}

func TestSlice_New_HashingAlgorithms_WithFileSystemFiles(t *testing.T) {

options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}

for _, item := range algoData {
options.DisableSlicing = item.disableSlicing
runHashCheckTestsForFileSystemFile(item.filename, item.algorithm, &options, item.expectHash, t)
}
}

func runHashCheckTestsForFileSystemFile(filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {
if binary, err := os.ReadFile(filename); err != nil {
t.Errorf("Unexpected io error %v", err)
} else {

fsSize := len(binary)
reader := bytes.NewReader(binary)
sr := io.NewSectionReader(reader, 0, int64(fsSize))

stats := SlicerStats{}

slicer := New(algorithm)

if err := slicer.Slice(sr, options, &stats); err != nil {
t.Errorf("Unexpected Slicer error %v", err)
}

actual := hex.EncodeToString(stats.Hash)

if len(expected) != len(actual) {
t.Errorf("hash length expected %d, got %d", len(expected), len(actual))
}

if !strings.EqualFold(actual, expected) {
t.Errorf("expected hash %s, got %s", expected, actual)
}
}
}
69 changes: 1 addition & 68 deletions pkg/slicer/slicer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,46 +228,7 @@ func TestSliceFS_New_FileSystemTestFile_TestManipulated1mb_WithSlicing(t *testin
}
runHashCheckTestsForFileSystemFile_WithSliceFS(fsys, filename, algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_Test1mb_WithSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "bb83f43630ee546f"
options := SlicerOptions{
DisableSlicing: false,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_TestManipulated1mb_WithSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "4f595576799edcd9"
options := SlicerOptions{
DisableSlicing: false,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test-manipulated.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_Test1mb_WithoutSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "6b6255ee515dcc04"
options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test.1mb", algorithm, &options, expected, t)
}
func TestSlice_New_FileSystemTestFile_TestManipulated1mb_WithoutSlicing(t *testing.T) {
algorithm := algorithms.Xxhash
expected := "4a1960f16a88960c"
options := SlicerOptions{
DisableSlicing: true,
DisableMeta: false,
DisableFileDetection: false,
}
runHashCheckTestsForFileSystemFile("./artefacts/test-manipulated.1mb", algorithm, &options, expected, t)
}

func runHashCheckTestsForFileSystemFile_WithSliceFS(fs fs.FS, filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {

slicer := New(algorithm)
Expand All @@ -288,34 +249,6 @@ func runHashCheckTestsForFileSystemFile_WithSliceFS(fs fs.FS, filename string, a
}

}
func runHashCheckTestsForFileSystemFile(filename string, algorithm algorithms.Algorithm, options *SlicerOptions, expected string, t *testing.T) {
if binary, err := os.ReadFile(filename); err != nil {
t.Errorf("Unexpected io error %v", err)
} else {

fsSize := len(binary)
reader := bytes.NewReader(binary)
sr := io.NewSectionReader(reader, 0, int64(fsSize))

stats := SlicerStats{}

slicer := New(algorithm)

if err := slicer.Slice(sr, options, &stats); err != nil {
t.Errorf("Unexpected Slicer error %v", err)
}

actual := hex.EncodeToString(stats.Hash)

if len(expected) != len(actual) {
t.Errorf("hash length expected %d, got %d", len(expected), len(actual))
}

if !strings.EqualFold(actual, expected) {
t.Errorf("expected hash %s, got %s", expected, actual)
}
}
}
func TestSlice_New_Hash_xxHash_With1KbBlob(t *testing.T) {
runHashAlgorithmTest(algorithms.Xxhash, t)
}
Expand Down
Loading
Loading