This repository has been archived by the owner on Aug 3, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
distance.go
128 lines (121 loc) · 4.14 KB
/
distance.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package gago
import (
"fmt"
"math"
)
// A Metric returns the distance between two genomes.
type Metric func(a, b Individual) float64
// A DistanceMemoizer computes and stores Metric calculations.
type DistanceMemoizer struct {
Metric Metric
Distances map[string]map[string]float64
nCalculations int // Total number of calls to Metric for testing purposes
}
// newDistanceMemoizer initializes a DistanceMemoizer.
func newDistanceMemoizer(metric Metric) DistanceMemoizer {
return DistanceMemoizer{
Metric: metric,
Distances: make(map[string]map[string]float64),
}
}
// GetDistance returns the distance between two Individuals based on the
// DistanceMemoizer's Metric field. If the two individuals share the same ID
// then GetDistance returns 0. DistanceMemoizer stores the calculated distances
// so that if GetDistance is called twice with the two same Individuals then
// the second call will return the stored distance instead of recomputing it.
func (dm *DistanceMemoizer) GetDistance(a, b Individual) float64 {
// Check if the two individuals are the same before proceding
if a.ID == b.ID {
return 0
}
// Create maps if the genomes have never been encountered
if _, ok := dm.Distances[a.ID]; !ok {
dm.Distances[a.ID] = make(map[string]float64)
} else {
// Check if the distance between the two genomes has been calculated
if dist, ok := dm.Distances[a.ID][b.ID]; ok {
return dist
}
}
if _, ok := dm.Distances[b.ID]; !ok {
dm.Distances[b.ID] = make(map[string]float64)
}
// Calculate the distance between the genomes and memoize it
var dist = dm.Metric(a, b)
dm.Distances[a.ID][b.ID] = dist
dm.Distances[b.ID][a.ID] = dist
dm.nCalculations++
return dist
}
// calcAvgDistances returns a map that associates the ID of each provided
// Individual with the average distance the Individual has with the rest of the
// Individuals.
func calcAvgDistances(indis Individuals, dm DistanceMemoizer) map[string]float64 {
var avgDistances = make(map[string]float64)
for _, a := range indis {
for _, b := range indis {
avgDistances[a.ID] += dm.GetDistance(a, b)
}
avgDistances[a.ID] /= float64(len(indis) - 1)
}
return avgDistances
}
func rebalanceClusters(clusters []Individuals, dm DistanceMemoizer, minPerCluster int) error {
// Calculate the number of missing Individuals per cluster for each cluster
// to reach at least minPerCluster Individuals.
var missing = make([]int, len(clusters))
for i, cluster := range clusters {
// Check that the cluster has at least one Individual
if len(cluster) == 0 {
return fmt.Errorf("Cluster %d has 0 individuals", i)
}
// Calculate the number of missing Individual in the cluster to reach minPerCluster
missing[i] = minPerCluster - len(cluster)
}
// Check if there are enough Individuals to rebalance the clusters.
if sumInts(missing) >= 0 {
return fmt.Errorf("Missing %d individuals to be able to rebalance the clusters",
sumInts(missing))
}
// Loop through the clusters that are missing Individuals
for i, cluster := range clusters {
// Check if the cluster is missing Individuals
if missing[i] <= 0 {
continue
}
// Assign new Individuals to the cluster while it is missing some
for missing[i] > 0 {
// Determine the medoid
cluster.SortByDistanceToMedoid(dm)
var medoid = cluster[0]
// Go through the Individuals of the other clusters and find the one
// closest to the computed medoid
var (
cci int // Closest cluster index
cii int // Closest Individual index
minDist = math.Inf(1)
)
for j := range clusters {
// Check that the cluster has Individuals to spare
if i == j || missing[j] >= 0 {
continue
}
// Find the closest Individual to the medoid inside the cluster
for k, indi := range clusters[j] {
var dist = dm.GetDistance(indi, medoid)
if dist < minDist {
cci = j
cii = k
minDist = dist
}
}
}
// Add the closest Individual to the cluster
clusters[i] = append(clusters[i], clusters[cci][cii])
// Remove the closest Individual from the cluster it belonged to
clusters[cci] = append(clusters[cci][:cii], clusters[cci][cii+1:]...)
missing[i]--
}
}
return nil
}