diff --git a/.gitignore b/.gitignore index 0e59b03..afa1394 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ build/* dist/* spectralcluster.egg-info/* .coverage +.DS_Store diff --git a/README.md b/README.md index ab21392..70a2a82 100644 --- a/README.md +++ b/README.md @@ -91,13 +91,14 @@ You can specify your refinment operations like this: ``` from spectralcluster import RefinementOptions +from spectralcluster import ThresholdType from spectralcluster import ICASSP2018_REFINEMENT_SEQUENCE refinement_options = RefinementOptions( gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=True, + thresholding_type=ThresholdType.RowMax, refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) ``` @@ -116,8 +117,8 @@ In the new version of this library, we support different types of Laplacian matr * None Laplacian (affinity matrix): `W` * Unnormalized Laplacian: `L = D - W` -* Graph cut Laplacian: `L' = D^{-1/2} L D^{-1/2}` -* Random walk Laplacian: `L' = D^{-1} L` +* Graph cut Laplacian: `L' = D^{-1/2} * L * D^{-1/2}` +* Random walk Laplacian: `L' = D^{-1} * L` You can specify the Laplacian matrix type with the `laplacian_type` argument of the `SpectralClusterer` class. diff --git a/docs/configs.html b/docs/configs.html index a1ac4ec..9baf2bd 100644 --- a/docs/configs.html +++ b/docs/configs.html @@ -34,6 +34,8 @@

Module spectralcluster.configs

RefinementName = refinement.RefinementName RefinementOptions = refinement.RefinementOptions +ThresholdType = refinement.ThresholdType +SymmetrizeType = refinement.SymmetrizeType SpectralClusterer = spectral_clusterer.SpectralClusterer @@ -52,7 +54,7 @@

Module spectralcluster.configs

gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=True, + thresholding_type=ThresholdType.RowMax, refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) icassp2018_clusterer = SpectralClusterer( diff --git a/docs/index.html b/docs/index.html index 1f465cf..50dbaec 100644 --- a/docs/index.html +++ b/docs/index.html @@ -47,6 +47,7 @@

Package spectralcluster

RefinementName = refinement.RefinementName RefinementOptions = refinement.RefinementOptions +ThresholdType = refinement.ThresholdType SymmetrizeType = refinement.SymmetrizeType SpectralClusterer = spectral_clusterer.SpectralClusterer diff --git a/docs/refinement.html b/docs/refinement.html index c5f7514..e859d24 100644 --- a/docs/refinement.html +++ b/docs/refinement.html @@ -45,6 +45,15 @@

Module spectralcluster.refinement

RowWiseNormalize = 6 +class ThresholdType(enum.Enum): + """Different types of thresholding.""" + # We clear values that are smaller than row_max*p_percentile + RowMax = 1 + + # We clear (p_percentile*100)% smallest values of the entire row + Percentile = 2 + + class SymmetrizeType(enum.Enum): """Different types of symmetrization operation.""" # We use max(A, A^T) @@ -61,7 +70,7 @@

Module spectralcluster.refinement

gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=True, + thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False, symmetrize_type=SymmetrizeType.Max, @@ -73,8 +82,7 @@

Module spectralcluster.refinement

p_percentile: the p-percentile for the row wise thresholding thresholding_soft_multiplier: the multiplier for soft threhsold, if this value is 0, then it's a hard thresholding - thresholding_with_row_max: if true, we use row_max * p_percentile as row - wise threshold, instead of doing a percentile-based thresholding + thresholding_type: the type of thresholding operation thresholding_with_binarization: if true, we set values larger than the threshold to 1 thresholding_preserve_diagonal: if true, in the row wise thresholding @@ -88,7 +96,7 @@

Module spectralcluster.refinement

self.gaussian_blur_sigma = gaussian_blur_sigma self.p_percentile = p_percentile self.thresholding_soft_multiplier = thresholding_soft_multiplier - self.thresholding_with_row_max = thresholding_with_row_max + self.thresholding_type = thresholding_type self.thresholding_with_binarization = thresholding_with_binarization self.thresholding_preserve_diagonal = thresholding_preserve_diagonal self.symmetrize_type = symmetrize_type @@ -121,7 +129,7 @@

Module spectralcluster.refinement

elif name == RefinementName.RowWiseThreshold: return RowWiseThreshold(self.p_percentile, self.thresholding_soft_multiplier, - self.thresholding_with_row_max, + self.thresholding_type, self.thresholding_with_binarization, self.thresholding_preserve_diagonal) elif name == RefinementName.Symmetrize: @@ -203,12 +211,14 @@

Module spectralcluster.refinement

def __init__(self, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=False, + thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False): self.p_percentile = p_percentile self.multiplier = thresholding_soft_multiplier - self.thresholding_with_row_max = thresholding_with_row_max + if not isinstance(thresholding_type, ThresholdType): + raise TypeError("thresholding_type must be a ThresholdType") + self.thresholding_type = thresholding_type self.thresholding_with_binarization = thresholding_with_binarization self.thresholding_preserve_diagonal = thresholding_preserve_diagonal @@ -217,17 +227,19 @@

Module spectralcluster.refinement

refined_affinity = np.copy(affinity) if self.thresholding_preserve_diagonal: np.fill_diagonal(refined_affinity, 0.0) - if self.thresholding_with_row_max: + if self.thresholding_type == ThresholdType.RowMax: # Row_max based thresholding row_max = refined_affinity.max(axis=1) row_max = np.expand_dims(row_max, axis=1) is_smaller = refined_affinity < (row_max * self.p_percentile) - else: + elif self.thresholding_type == ThresholdType.Percentile: # Percentile based thresholding row_percentile = np.percentile( refined_affinity, self.p_percentile * 100, axis=1) row_percentile = np.expand_dims(row_percentile, axis=1) is_smaller = refined_affinity < row_percentile + else: + raise ValueError("Unsupported thresholding_type") if self.thresholding_with_binarization: # For values larger than the threshold, we binarize them to 1 refined_affinity = (np.ones_like( @@ -245,13 +257,13 @@

Module spectralcluster.refinement

"""The Symmetrization operation.""" def __init__(self, symmetrize_type=SymmetrizeType.Max): + if not isinstance(symmetrize_type, SymmetrizeType): + raise TypeError("symmetrize_type must be a SymmetrizeType") self.symmetrize_type = symmetrize_type def refine(self, affinity): self.check_input(affinity) - if not isinstance(self.symmetrize_type, SymmetrizeType): - raise TypeError("symmetrize_type must be a SymmetrizeType") - elif self.symmetrize_type == SymmetrizeType.Max: + if self.symmetrize_type == SymmetrizeType.Max: return np.maximum(affinity, np.transpose(affinity)) elif self.symmetrize_type == SymmetrizeType.Average: return 0.5 * (affinity + np.transpose(affinity)) @@ -572,7 +584,7 @@

Class variables

class RefinementOptions -(gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, thresholding_with_row_max=True, thresholding_with_binarization=False, thresholding_preserve_diagonal=False, symmetrize_type=SymmetrizeType.Max, refinement_sequence=None) +(gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False, symmetrize_type=SymmetrizeType.Max, refinement_sequence=None)

Refinement options for the affinity matrix.

@@ -586,9 +598,8 @@

Args

thresholding_soft_multiplier
the multiplier for soft threhsold, if this value is 0, then it's a hard thresholding
-
thresholding_with_row_max
-
if true, we use row_max * p_percentile as row -wise threshold, instead of doing a percentile-based thresholding
+
thresholding_type
+
the type of thresholding operation
thresholding_with_binarization
if true, we set values larger than the threshold to 1
@@ -614,7 +625,7 @@

Args

gaussian_blur_sigma=1, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=True, + thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False, symmetrize_type=SymmetrizeType.Max, @@ -626,8 +637,7 @@

Args

p_percentile: the p-percentile for the row wise thresholding thresholding_soft_multiplier: the multiplier for soft threhsold, if this value is 0, then it's a hard thresholding - thresholding_with_row_max: if true, we use row_max * p_percentile as row - wise threshold, instead of doing a percentile-based thresholding + thresholding_type: the type of thresholding operation thresholding_with_binarization: if true, we set values larger than the threshold to 1 thresholding_preserve_diagonal: if true, in the row wise thresholding @@ -641,7 +651,7 @@

Args

self.gaussian_blur_sigma = gaussian_blur_sigma self.p_percentile = p_percentile self.thresholding_soft_multiplier = thresholding_soft_multiplier - self.thresholding_with_row_max = thresholding_with_row_max + self.thresholding_type = thresholding_type self.thresholding_with_binarization = thresholding_with_binarization self.thresholding_preserve_diagonal = thresholding_preserve_diagonal self.symmetrize_type = symmetrize_type @@ -674,7 +684,7 @@

Args

elif name == RefinementName.RowWiseThreshold: return RowWiseThreshold(self.p_percentile, self.thresholding_soft_multiplier, - self.thresholding_with_row_max, + self.thresholding_type, self.thresholding_with_binarization, self.thresholding_preserve_diagonal) elif name == RefinementName.Symmetrize: @@ -733,7 +743,7 @@

Raises

elif name == RefinementName.RowWiseThreshold: return RowWiseThreshold(self.p_percentile, self.thresholding_soft_multiplier, - self.thresholding_with_row_max, + self.thresholding_type, self.thresholding_with_binarization, self.thresholding_preserve_diagonal) elif name == RefinementName.Symmetrize: @@ -783,7 +793,7 @@

Inherited members

class RowWiseThreshold -(p_percentile=0.95, thresholding_soft_multiplier=0.01, thresholding_with_row_max=False, thresholding_with_binarization=False, thresholding_preserve_diagonal=False) +(p_percentile=0.95, thresholding_soft_multiplier=0.01, thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False)

Apply row wise thresholding.

@@ -797,12 +807,14 @@

Inherited members

def __init__(self, p_percentile=0.95, thresholding_soft_multiplier=0.01, - thresholding_with_row_max=False, + thresholding_type=ThresholdType.RowMax, thresholding_with_binarization=False, thresholding_preserve_diagonal=False): self.p_percentile = p_percentile self.multiplier = thresholding_soft_multiplier - self.thresholding_with_row_max = thresholding_with_row_max + if not isinstance(thresholding_type, ThresholdType): + raise TypeError("thresholding_type must be a ThresholdType") + self.thresholding_type = thresholding_type self.thresholding_with_binarization = thresholding_with_binarization self.thresholding_preserve_diagonal = thresholding_preserve_diagonal @@ -811,17 +823,19 @@

Inherited members

refined_affinity = np.copy(affinity) if self.thresholding_preserve_diagonal: np.fill_diagonal(refined_affinity, 0.0) - if self.thresholding_with_row_max: + if self.thresholding_type == ThresholdType.RowMax: # Row_max based thresholding row_max = refined_affinity.max(axis=1) row_max = np.expand_dims(row_max, axis=1) is_smaller = refined_affinity < (row_max * self.p_percentile) - else: + elif self.thresholding_type == ThresholdType.Percentile: # Percentile based thresholding row_percentile = np.percentile( refined_affinity, self.p_percentile * 100, axis=1) row_percentile = np.expand_dims(row_percentile, axis=1) is_smaller = refined_affinity < row_percentile + else: + raise ValueError("Unsupported thresholding_type") if self.thresholding_with_binarization: # For values larger than the threshold, we binarize them to 1 refined_affinity = (np.ones_like( @@ -862,13 +876,13 @@

Inherited members

"""The Symmetrization operation.""" def __init__(self, symmetrize_type=SymmetrizeType.Max): + if not isinstance(symmetrize_type, SymmetrizeType): + raise TypeError("symmetrize_type must be a SymmetrizeType") self.symmetrize_type = symmetrize_type def refine(self, affinity): self.check_input(affinity) - if not isinstance(self.symmetrize_type, SymmetrizeType): - raise TypeError("symmetrize_type must be a SymmetrizeType") - elif self.symmetrize_type == SymmetrizeType.Max: + if self.symmetrize_type == SymmetrizeType.Max: return np.maximum(affinity, np.transpose(affinity)) elif self.symmetrize_type == SymmetrizeType.Average: return 0.5 * (affinity + np.transpose(affinity)) @@ -923,6 +937,40 @@

Class variables

+
+class ThresholdType +(value, names=None, *, module=None, qualname=None, type=None, start=1) +
+
+

Different types of thresholding.

+
+ +Expand source code + +
class ThresholdType(enum.Enum):
+  """Different types of thresholding."""
+  # We clear values that are smaller than row_max*p_percentile
+  RowMax = 1
+
+  # We clear (p_percentile*100)% smallest values of the entire row
+  Percentile = 2
+
+

Ancestors

+ +

Class variables

+
+
var Percentile
+
+
+
+
var RowMax
+
+
+
+
+
@@ -988,6 +1036,13 @@

Max +
  • +

    ThresholdType

    + +
  • diff --git a/setup.py b/setup.py index 64c90ee..0d4cc7c 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import setuptools -VERSION = "0.2.0" +VERSION = "0.2.1" with open("README.md", "r") as file_object: LONG_DESCRIPTION = file_object.read()