From da57034fd21d1bcd597c60277e550b444e8820f4 Mon Sep 17 00:00:00 2001 From: Minke Zhang Date: Sat, 30 Jul 2022 11:30:30 -0700 Subject: [PATCH] Update documentation (#8) * Edit README * Update kd module documentation * Update documentation * Tweak performance test params --- README.md | 21 +++++++------ container/container.go | 2 +- internal/perf/util/util.go | 4 +-- kd/kd.go | 60 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index afd5279..f47ea48 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ import ( ckd "github.com/downflux/go-kd/kd" ) -var _ point.P = &P{} - // P implements the point.P interface, which needs to provide a coordinate // vector function P(). +var _ point.P = &P{} + type P struct { p vector.V tag string @@ -41,15 +41,15 @@ func (p *P) Equal(q *P) bool { return vector.Within(p.P(), q.P()) && p.tag == q. func main() { data := []*P{ - &P{p: vector.V([]float64{1, 2}), tag: "A"}, - &P{p: vector.V([]float64{2, 100}), tag: "B"}, + &P{p: vector.V{1, 2}, tag: "A"}, + &P{p: vector.V{2, 100}, tag: "B"}, } - // Data is copy-constructed, and may be read from outside the k-D tree. + // Data is copy-constructed and may be read from outside the k-D tree. // // N.B.: We are casting the k-D tree into a container type, as this // allows for the user to easily switch between implementations. The - // user may directly consume the kd package API directly. + // user may directly consume the kd package API instead. var t container.C[*P] = (*kd.KD[*P])( ckd.New[*P](ckd.O[*P]{ Data: data, @@ -59,7 +59,10 @@ func main() { ) fmt.Println("KNN search") - for _, p := range t.KNN(vector.V([]float64{0, 0}), 1, func(p *P) bool { return true }) { + for _, p := range t.KNN( + /* v = */ vector.V{0, 0}, + /* k = */ 2, + func(p *P) bool { return true }) { fmt.Println(p) } @@ -73,8 +76,8 @@ func main() { fmt.Println("range search") for _, p := range t.RangeSearch( *hyperrectangle.New( - vector.V([]float64{0, 0}), - vector.V([]float64{100, 100}), + /* min = */ vector.V{0, 0}, + /* max = */ vector.V{100, 100}, ), func(p *P) bool { return true }, ) { diff --git a/container/container.go b/container/container.go index 23960f3..fdebed6 100644 --- a/container/container.go +++ b/container/container.go @@ -36,7 +36,7 @@ type C[T point.P] interface { Insert(p T) // Remove deletes an existing data point from the container. This - // function will delete the first matching point with the given + // function will delete an arbitrary matching point with the given // coordinates. Remove(p vector.V, f filter.F[T]) (T, bool) } diff --git a/internal/perf/util/util.go b/internal/perf/util/util.go index a31c452..07d3a75 100644 --- a/internal/perf/util/util.go +++ b/internal/perf/util/util.go @@ -15,8 +15,8 @@ import ( var ( BenchmarkKRange = []vector.D{2, 16, 128} - BenchmarkNRange = []int{1e3, 1e4, 1e5, 1e6} - BenchmarkSizeRange = []int{1, 32, 1024} + BenchmarkNRange = []int{1e3, 1e4, 1e6} + BenchmarkSizeRange = []int{1, 32, 512} BenchmarkFRange = []float64{0.05, 0.1, 0.25} KRange = []vector.D{2} diff --git a/kd/kd.go b/kd/kd.go index f791c07..41f36b8 100644 --- a/kd/kd.go +++ b/kd/kd.go @@ -1,3 +1,17 @@ +// Package kd implements a k-D tree with arbitrary data packing and duplicate +// data coordinate support. +// +// k-D trees are generally a cacheing layer representation of the local state -- +// we do not expect to be making frequent mutations to this tree once +// constructed. +// +// Read operations on this k-D tree may be done in parallel. Mutations on the +// k-D tree must be done serially. +// +// N.B.: Mutating the data point positions must be accompanied by mutating the +// k-D tree. For large numbers of points, and for a large number of queries, the +// time taken to build the tree will be offset by the speedup of subsequent +// reads. package kd import ( @@ -15,8 +29,15 @@ type O[T point.P] struct { Data []T K vector.D - // N is the leaf size of the k-D tree. Leaf nodes are checked via - // bruteforce methods. + // N is the nominal leaf size of the k-D tree. Leaf nodes are checked + // via bruteforce methods. + // + // Note that individual nodes (including non-leaf nodes) may contain + // elements that exceed this size constraint after inserts and removes. + // + // Leaf size will significantly impact performance -- users should + // tailor this value to their specific use-case. We recommend setting + // this value to 16 and up as the size of the data set increases. N int } @@ -53,6 +74,9 @@ func New[T point.P](o O[T]) *KD[T] { return t } +// Balance reconstructs the k-D tree. +// +// This k-D tree implementation does not support concurrent mutations. func (t *KD[T]) Balance() { t.root = tree.New[T](tree.O[T]{ Data: Data(t), @@ -62,16 +86,46 @@ func (t *KD[T]) Balance() { }) } -func (t *KD[T]) Insert(p T) { t.root.Insert(p) } +// Insert adds a new point into the k-D tree. +// +// Insert is not a balanced operation -- after many mutations, the tree should +// be explicitly reconstructed. +// +// This k-D tree implementation does not support concurrent mutations. +func (t *KD[T]) Insert(p T) { t.root.Insert(p) } + +// Remove pops a point from the k-D tree which lies at the input vector v and +// matches the filter. Note that if multiple points match both the location +// vector and the filter, an arbitrary one will be removed. This function will +// pop at most one element from the k-D tree. +// +// Remove is not a balanced operation -- after many mutations, the tree should +// be explicitly reconstructed. +// +// This k-D tree implementation does not support concurrent mutations. +// +// If there is no matching point, the returned bool will be false. func (t *KD[T]) Remove(v vector.V, f filter.F[T]) (T, bool) { return t.root.Remove(v, f) } +// KNN returns the k nearest neighbors to the input vector p and matches the +// filter function. +// +// This k-D tree implementation supports concurrent read operations. func KNN[T point.P](t *KD[T], p vector.V, k int, f filter.F[T]) []T { return knn.KNN(t.root, p, k, f) } + +// RangeSearch returns all points which are found in the given bounds and +// matches the filter function. +// +// This k-D tree implementation supports concurrent read operations. func RangeSearch[T point.P](t *KD[T], q hyperrectangle.R, f filter.F[T]) []T { return rangesearch.RangeSearch(t.root, q, f) } +// Data returns all points in the k-D tree. +// +// This k-D tree implementation supports concurrent read operations. func Data[T point.P](t *KD[T]) []T { if t.root.Nil() { return nil