Skip to content

Commit 3fe9169

Browse files
committed
Init issue 11 implementation
1 parent d305406 commit 3fe9169

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

Diff for: src/cluster/hierarchical.rs

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/// # Hierarchical clustering
2+
///
3+
/// Implement hierarchical clustering methods:
4+
/// * Agglomerative clustering (current)
5+
/// * Bisecting K-Means (future)
6+
/// * Fastcluster (future)
7+
///
8+
9+
/*
10+
class AgglomerativeClustering():
11+
"""
12+
Parameters
13+
----------
14+
n_clusters : int or None, default=2
15+
The number of clusters to find. It must be ``None`` if
16+
``distance_threshold`` is not ``None``.
17+
affinity : str or callable, default='euclidean'
18+
If linkage is "ward", only "euclidean" is accepted.
19+
linkage : {'ward',}, default='ward'
20+
Which linkage criterion to use. The linkage criterion determines which
21+
distance to use between sets of observation. The algorithm will merge
22+
the pairs of cluster that minimize this criterion.
23+
- 'ward' minimizes the variance of the clusters being merged.
24+
compute_distances : bool, default=False
25+
Computes distances between clusters even if `distance_threshold` is not
26+
used. This can be used to make dendrogram visualization, but introduces
27+
a computational and memory overhead.
28+
"""
29+
30+
def fit(X):
31+
# compute tree
32+
parents, childern = ward_tree(X, ....)
33+
# compute clusters
34+
labels = _hierarchical.hc_get_heads(parents)
35+
# assign cluster numbers
36+
self.labels_ = np.searchsorted(np.unique(labels), labels)
37+
38+
*/
39+
40+
// implement ward tree
41+
42+
43+
// implement hierarchical cut (only needed if we want to allwo compute_full_tree) (future)
44+
45+
46+
// HOT: try to implement fastcluster <https://arxiv.org/pdf/1109.2378.pdf> (future)
47+
48+
49+
// additional: implement BisectingKMeans (future)
50+
51+
52+
mod tests {
53+
// >>> from sklearn.cluster import AgglomerativeClustering
54+
// >>> import numpy as np
55+
// >>> X = np.array([[1, 2], [1, 4], [1, 0],
56+
// ... [4, 2], [4, 4], [4, 0]])
57+
// >>> clustering = AgglomerativeClustering().fit(X)
58+
// >>> clustering
59+
// AgglomerativeClustering()
60+
// >>> clustering.labels_
61+
// array([1, 1, 1, 0, 0, 0])
62+
}

0 commit comments

Comments
 (0)