From 0e399d0ba7deebb28ddc042a5527dc05b8edaa07 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 29 Jul 2021 11:19:41 +0100 Subject: [PATCH 1/5] Allow CategoricalComponent to be n-dimensional --- glue/core/component.py | 5 +---- glue/core/tests/test_component.py | 10 ++++++++++ glue/utils/array.py | 4 ++-- glue/utils/tests/test_array.py | 8 ++++++++ 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/glue/core/component.py b/glue/core/component.py index bb958a020..667b3794c 100644 --- a/glue/core/component.py +++ b/glue/core/component.py @@ -389,9 +389,6 @@ def __init__(self, categorical_data, categories=None, jitter=None, units=None): self._data = categorical_ndarray(categorical_data, copy=False, categories=categories) - if self._data.ndim != 1: - raise ValueError("Categorical Data must be 1-dimensional") - self.jitter(method=jitter) @property @@ -435,7 +432,7 @@ def jitter(self, method=None): Parameters ---------- method : {None, 'uniform'} - If `None`, not jittering is done (or any jittering is undone). + If `None`, no jittering is done (or any jittering is undone). If ``'uniform'``, the codes are randomized by a uniformly distributed random variable. """ diff --git a/glue/core/tests/test_component.py b/glue/core/tests/test_component.py index 6f41fb3dd..656aaac7d 100644 --- a/glue/core/tests/test_component.py +++ b/glue/core/tests/test_component.py @@ -183,6 +183,16 @@ def test_valueerror_on_bad_jitter(self): cat_comp.jitter(method='this will never be a jitter method') +def test_nd_categorical_component(): + + data = [['a', 'b'], ['c', 'b']] + cat_comp = CategoricalComponent(data) + np.testing.assert_equal(cat_comp.data, data) + np.testing.assert_equal(cat_comp.labels, data) + np.testing.assert_equal(cat_comp.codes, [[0, 1], [2, 1]]) + np.testing.assert_equal(cat_comp.categories, ['a', 'b', 'c']) + + class TestCoordinateComponent(object): def setup_method(self, method): diff --git a/glue/utils/array.py b/glue/utils/array.py index 82afcaa43..1cb4b3776 100644 --- a/glue/utils/array.py +++ b/glue/utils/array.py @@ -57,8 +57,8 @@ def unique(array): # numpy.unique doesn't handle mixed-types on python3, # so we use pandas array = np.asarray(array) - I, U = pd.factorize(array, sort=True) - return U.astype(array.dtype), I + I, U = pd.factorize(array.ravel(), sort=True) + return U.astype(array.dtype), I.reshape(array.shape) def shape_to_string(shape): diff --git a/glue/utils/tests/test_array.py b/glue/utils/tests/test_array.py index b6765d756..adfef7a29 100644 --- a/glue/utils/tests/test_array.py +++ b/glue/utils/tests/test_array.py @@ -36,6 +36,14 @@ def test_unique_dtype(): assert U.dtype.kind in 'SU' +def test_unique_nd(): + + array = np.array([['a', 'b'], ['c', 'a']]) + U, I = unique(array) + assert_equal(U, ['a', 'b', 'c']) + assert_equal(I, [[0, 1], [2, 0]]) + + def test_shape_to_string(): assert shape_to_string((1, 4, 3)) == "(1, 4, 3)" From 07b3250a21694d06e51148ee8ffd239926908822 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 29 Jul 2021 11:21:09 +0100 Subject: [PATCH 2/5] Added changelog entry --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 8f4073e97..14b1ab4cb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -12,6 +12,8 @@ v1.2.0 (unreleased) * Expose ``DataCollection.delay_link_manager_update`` which can be used to delay any updating to the link tree when adding datasets. [#2225] +* Allow CategoricalComponents to be n-dimensional. [#2214] + v1.1.0 (2021-07-21) ------------------- From 3e67b48eae88c325e8e2ef6c7be938374e4758e6 Mon Sep 17 00:00:00 2001 From: Jonathan Foster Date: Wed, 4 Aug 2021 14:43:33 -0400 Subject: [PATCH 3/5] Fix autotyping and subsets for Nd categoricals --- glue/core/component.py | 2 +- glue/core/subset.py | 2 +- glue/core/tests/test_component.py | 8 ++++++++ glue/core/tests/test_subset.py | 12 ++++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/glue/core/component.py b/glue/core/component.py index 667b3794c..4cf1411d5 100644 --- a/glue/core/component.py +++ b/glue/core/component.py @@ -148,7 +148,7 @@ def autotyped(cls, data, units=None): if data.dtype.kind == 'M': return DateTimeComponent(data) - n = coerce_numeric(data) + n = coerce_numeric(data.ravel()).reshape(data.shape) thresh = 0.5 try: diff --git a/glue/core/subset.py b/glue/core/subset.py index 28d8398a5..0021c22df 100644 --- a/glue/core/subset.py +++ b/glue/core/subset.py @@ -684,7 +684,7 @@ def to_mask(self, data, view=None): x = data[self.att, view] result = self.roi.contains(x, None) assert x.shape == result.shape - return result.ravel() + return result def copy(self): result = CategoricalROISubsetState() diff --git a/glue/core/tests/test_component.py b/glue/core/tests/test_component.py index 656aaac7d..f42d68b06 100644 --- a/glue/core/tests/test_component.py +++ b/glue/core/tests/test_component.py @@ -193,6 +193,14 @@ def test_nd_categorical_component(): np.testing.assert_equal(cat_comp.categories, ['a', 'b', 'c']) +def test_nd_categorical_component_autotype(): + x = np.array([['M', 'F'], ['F', 'M']]) + assert Component.autotyped(x).categorical + + d = Data(x=[['male', 'female', 'male'],['female','female','male']]) + assert d.get_component('x').categorical + + class TestCoordinateComponent(object): def setup_method(self, method): diff --git a/glue/core/tests/test_subset.py b/glue/core/tests/test_subset.py index b33cdad8b..c330cf096 100644 --- a/glue/core/tests/test_subset.py +++ b/glue/core/tests/test_subset.py @@ -693,6 +693,18 @@ def test_inherited_properties(): assert sub.hub is d.hub +class TestNDCategoricalSubsets(): + + def setup_method(self, method): + self.data = Data(sex=[['Male','Male','Female'],['Female','Male','Female']]) + + def test_categorical_roi_subset(self): + roi = CategoricalROI(['sex','Male']) + subset = self.data.new_subset() + subset.subset_state = CategoricalROISubsetState(att=self.data.id['sex'], roi=roi) + np.testing.assert_equal(self.data.subsets[0].to_mask(), [[1, 1, 0],[0, 1, 0]]) + + class TestCloneSubsetStates(): def setup_method(self, method): From ac110e5ebe27677154fe0496df73a3d1cabc6cf2 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 12 Aug 2021 16:51:32 +0100 Subject: [PATCH 4/5] Require at least one dimension for categorical components --- glue/core/component.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/glue/core/component.py b/glue/core/component.py index 4cf1411d5..57333db8a 100644 --- a/glue/core/component.py +++ b/glue/core/component.py @@ -389,6 +389,9 @@ def __init__(self, categorical_data, categories=None, jitter=None, units=None): self._data = categorical_ndarray(categorical_data, copy=False, categories=categories) + if self._data.ndim < 1: + raise ValueError("Categorical Data must be at least 1-dimensional") + self.jitter(method=jitter) @property From dbc27db7ad65289ba3bc2995ee63d6ba9efb808b Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Thu, 12 Aug 2021 14:47:30 +0100 Subject: [PATCH 5/5] Code style fixes --- glue/core/tests/test_component.py | 15 +++++++-------- glue/core/tests/test_subset.py | 6 +++--- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/glue/core/tests/test_component.py b/glue/core/tests/test_component.py index f42d68b06..550d31481 100644 --- a/glue/core/tests/test_component.py +++ b/glue/core/tests/test_component.py @@ -184,20 +184,19 @@ def test_valueerror_on_bad_jitter(self): def test_nd_categorical_component(): - - data = [['a', 'b'], ['c', 'b']] - cat_comp = CategoricalComponent(data) - np.testing.assert_equal(cat_comp.data, data) - np.testing.assert_equal(cat_comp.labels, data) - np.testing.assert_equal(cat_comp.codes, [[0, 1], [2, 1]]) - np.testing.assert_equal(cat_comp.categories, ['a', 'b', 'c']) + data = [['a', 'b'], ['c', 'b']] + cat_comp = CategoricalComponent(data) + np.testing.assert_equal(cat_comp.data, data) + np.testing.assert_equal(cat_comp.labels, data) + np.testing.assert_equal(cat_comp.codes, [[0, 1], [2, 1]]) + np.testing.assert_equal(cat_comp.categories, ['a', 'b', 'c']) def test_nd_categorical_component_autotype(): x = np.array([['M', 'F'], ['F', 'M']]) assert Component.autotyped(x).categorical - d = Data(x=[['male', 'female', 'male'],['female','female','male']]) + d = Data(x=[['male', 'female', 'male'], ['female', 'female', 'male']]) assert d.get_component('x').categorical diff --git a/glue/core/tests/test_subset.py b/glue/core/tests/test_subset.py index c330cf096..e953efc0b 100644 --- a/glue/core/tests/test_subset.py +++ b/glue/core/tests/test_subset.py @@ -696,13 +696,13 @@ def test_inherited_properties(): class TestNDCategoricalSubsets(): def setup_method(self, method): - self.data = Data(sex=[['Male','Male','Female'],['Female','Male','Female']]) + self.data = Data(sex=[['Male', 'Male', 'Female'], ['Female', 'Male', 'Female']]) def test_categorical_roi_subset(self): - roi = CategoricalROI(['sex','Male']) + roi = CategoricalROI(['sex', 'Male']) subset = self.data.new_subset() subset.subset_state = CategoricalROISubsetState(att=self.data.id['sex'], roi=roi) - np.testing.assert_equal(self.data.subsets[0].to_mask(), [[1, 1, 0],[0, 1, 0]]) + np.testing.assert_equal(self.data.subsets[0].to_mask(), [[1, 1, 0], [0, 1, 0]]) class TestCloneSubsetStates():