Skip to content

Commit

Permalink
Added weighted HRW sorting (#5)
Browse files Browse the repository at this point in the history
* Added weighted HRW sorting

This commit proposes renaming of old `SortByWeight` functions to `Sort`
and implementation of `SortByWeight` function with explicit weights in
arguments. `SortByWeight` function calculates normalized hashes of
nodes and normalized input weights. Then multiplies these values to
obtain node's actual weight for later sorting.

- renamed `SortByWeight` function to `Sort`
- added `SortByWeight`, `SortSliceByWeightValue` and
  `SortSliceBeWeightIndex` functions
- moved code with reflection processing into `prepareRule` function
- added tests and benchmarks for new weighted functions
- added benchmark results into README

* Fixed comments
  • Loading branch information
alexvanin authored May 27, 2019
1 parent d32f396 commit 58a8ce4
Show file tree
Hide file tree
Showing 3 changed files with 438 additions and 72 deletions.
29 changes: 20 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,26 @@
## Benchmark:

```
BenchmarkSortByWeight_fnv_10-8 3000000 435 ns/op 224 B/op 3 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 7238 ns/op 1856 B/op 3 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 163158 ns/op 16448 B/op 3 allocs/op
BenchmarkSortByIndex_fnv_10-8 2000000 642 ns/op 384 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 200000 8045 ns/op 2928 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 10000 227527 ns/op 25728 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 1000000 1244 ns/op 544 B/op 17 allocs/op
BenchmarkSortByValue_fnv_100-8 100000 12397 ns/op 4528 B/op 107 allocs/op
BenchmarkSortByValue_fnv_1000-8 10000 154278 ns/op 41728 B/op 1007 allocs/op
BenchmarkSort_fnv_10-8 5000000 354 ns/op 224 B/op 3 allocs/op
BenchmarkSort_fnv_100-8 300000 5103 ns/op 1856 B/op 3 allocs/op
BenchmarkSort_fnv_1000-8 10000 115874 ns/op 16448 B/op 3 allocs/op
BenchmarkSortByIndex_fnv_10-8 3000000 562 ns/op 384 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_100-8 200000 5819 ns/op 2928 B/op 7 allocs/op
BenchmarkSortByIndex_fnv_1000-8 10000 125859 ns/op 25728 B/op 7 allocs/op
BenchmarkSortByValue_fnv_10-8 2000000 1056 ns/op 544 B/op 17 allocs/op
BenchmarkSortByValue_fnv_100-8 200000 9593 ns/op 4528 B/op 107 allocs/op
BenchmarkSortByValue_fnv_1000-8 10000 109272 ns/op 41728 B/op 1007 allocs/op
BenchmarkSortByWeight_fnv_10-8 3000000 500 ns/op 320 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_100-8 200000 8257 ns/op 2768 B/op 4 allocs/op
BenchmarkSortByWeight_fnv_1000-8 10000 197938 ns/op 24656 B/op 4 allocs/op
BenchmarkSortByWeightIndex_fnv_10-8 2000000 760 ns/op 480 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_100-8 200000 9191 ns/op 3840 B/op 8 allocs/op
BenchmarkSortByWeightIndex_fnv_1000-8 10000 208204 ns/op 33936 B/op 8 allocs/op
BenchmarkSortByWeightValue_fnv_10-8 1000000 1095 ns/op 640 B/op 18 allocs/op
BenchmarkSortByWeightValue_fnv_100-8 200000 12291 ns/op 5440 B/op 108 allocs/op
BenchmarkSortByWeightValue_fnv_1000-8 10000 145125 ns/op 49936 B/op 1008 allocs/op
```

## Example
Expand Down
184 changes: 134 additions & 50 deletions hrw.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ type (
sorted []uint64
weight []uint64
}

weighted struct {
h hashed
normal []float64 // normalized input weights
}
)

func weight(x uint64, y uint64) uint64 {
Expand All @@ -36,16 +41,29 @@ func weight(x uint64, y uint64) uint64 {
}

func (h hashed) Len() int { return h.length }
func (h hashed) Less(i, j int) bool { return h.weight[h.sorted[i]] < h.weight[h.sorted[j]] }
func (h hashed) Swap(i, j int) { h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i] }
func (h hashed) Less(i, j int) bool { return h.weight[i] < h.weight[j] }
func (h hashed) Swap(i, j int) {
h.sorted[i], h.sorted[j] = h.sorted[j], h.sorted[i]
h.weight[i], h.weight[j] = h.weight[j], h.weight[i]
}

func (w weighted) Len() int { return w.h.length }
func (w weighted) Less(i, j int) bool {
// `maxUint64 - weight` makes least weight most valuable
// it is necessary for operation with normalized values
wi := float64(^uint64(0)-w.h.weight[i]) * w.normal[i]
wj := float64(^uint64(0)-w.h.weight[j]) * w.normal[j]
return wi > wj // higher weight must be placed lower to be first
}
func (w weighted) Swap(i, j int) { w.normal[i], w.normal[j] = w.normal[j], w.normal[i]; w.h.Swap(i, j) }

// Hash uses murmur3 hash to return uint64
func Hash(key []byte) uint64 {
return murmur3.Sum64(key)
}

// SortByWeight receive nodes and hash, and sort it by weight
func SortByWeight(nodes []uint64, hash uint64) []uint64 {
// Sort receive nodes and hash, and sort it by weight
func Sort(nodes []uint64, hash uint64) []uint64 {
var (
l = len(nodes)
h = hashed{
Expand All @@ -64,22 +82,129 @@ func SortByWeight(nodes []uint64, hash uint64) []uint64 {
return h.sorted
}

// SortByWeight receive nodes and hash, and sort it by weight
func SortByWeight(nodes []uint64, weights []uint64, hash uint64) []uint64 {
var (
maxWeight uint64

l = len(nodes)
w = weighted{
h: hashed{
length: l,
sorted: make([]uint64, 0, l),
weight: make([]uint64, 0, l),
},
normal: make([]float64, 0, l),
}
)

// finding max weight to perform normalization
for i := range weights {
if maxWeight < weights[i] {
maxWeight = weights[i]
}
}

// if all nodes have 0-weights or weights are incorrect then sort uniformly
if maxWeight == 0 || l != len(nodes) {
return Sort(nodes, hash)
}

fMaxWeight := float64(maxWeight)
for i, node := range nodes {
w.h.sorted = append(w.h.sorted, uint64(i))
w.h.weight = append(w.h.weight, weight(node, hash))
w.normal = append(w.normal, float64(weights[i])/fMaxWeight)
}
sort.Sort(w)
return w.h.sorted
}

// SortSliceByValue received []T and hash to sort by value-weight
func SortSliceByValue(slice interface{}, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = Sort(rule, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
}
}

// SortSliceByWeightValue received []T, weights and hash to sort by value-weight
func SortSliceByWeightValue(slice interface{}, weight []uint64, hash uint64) {
rule := prepareRule(slice)
if rule != nil {
swap := reflect.Swapper(slice)
rule = SortByWeight(rule, weight, hash)
sortByRuleInverse(swap, uint64(len(rule)), rule)
}
}

// SortSliceByIndex received []T and hash to sort by index-weight
func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = Sort(rule, hash)
sortByRuleInverse(swap, length, rule)
}

// SortSliceByWeightIndex received []T, weights and hash to sort by index-weight
func SortSliceByWeightIndex(slice interface{}, weight []uint64, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, weight, hash)
sortByRuleInverse(swap, length, rule)
}

func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}

func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}

for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
}

func prepareRule(slice interface{}) []uint64 {
t := reflect.TypeOf(slice)
if t.Kind() != reflect.Slice {
return
return nil
}

var (
val = reflect.ValueOf(slice)
swap = reflect.Swapper(slice)
length = val.Len()
rule = make([]uint64, 0, length)
)

if length == 0 {
return
return nil
}

switch slice := slice.(type) {
Expand Down Expand Up @@ -148,54 +273,13 @@ func SortSliceByValue(slice interface{}, hash uint64) {

default:
if _, ok := val.Index(0).Interface().(Hasher); !ok {
return
return nil
}

for i := 0; i < length; i++ {
h := val.Index(i).Interface().(Hasher)
rule = append(rule, h.Hash())
}
}

rule = SortByWeight(rule, hash)
sortByRuleInverse(swap, uint64(length), rule)
}

// SortSliceByIndex received []T and hash to sort by index-weight
func SortSliceByIndex(slice interface{}, hash uint64) {
length := uint64(reflect.ValueOf(slice).Len())
swap := reflect.Swapper(slice)
rule := make([]uint64, 0, length)
for i := uint64(0); i < length; i++ {
rule = append(rule, i)
}
rule = SortByWeight(rule, hash)
sortByRuleInverse(swap, length, rule)
}

func sortByRuleDirect(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}
for j := rule[i]; !done[rule[j]]; j = rule[j] {
swap(int(i), int(j))
done[j] = true
}
}
}

func sortByRuleInverse(swap swapper, length uint64, rule []uint64) {
done := make([]bool, length)
for i := uint64(0); i < length; i++ {
if done[i] {
continue
}

for j := i; !done[rule[j]]; j = rule[j] {
swap(int(j), int(rule[j]))
done[j] = true
}
}
return rule
}
Loading

0 comments on commit 58a8ce4

Please sign in to comment.