Skip to content

Commit

Permalink
Merge pull request #2214 from astrofrog/categorical-nd
Browse files Browse the repository at this point in the history
Allow CategoricalComponent to be n-dimensional
  • Loading branch information
astrofrog authored Aug 12, 2021
2 parents 049d3c3 + dbc27db commit 5978a92
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ v1.2.0 (unreleased)
* Expose ``DataCollection.delay_link_manager_update`` which can be used
to delay any updating to the link tree when adding datasets. [#2225]

* Allow CategoricalComponents to be n-dimensional. [#2214]

v1.1.0 (2021-07-21)
-------------------

Expand Down
8 changes: 4 additions & 4 deletions glue/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def autotyped(cls, data, units=None):
if data.dtype.kind == 'M':
return DateTimeComponent(data)

n = coerce_numeric(data)
n = coerce_numeric(data.ravel()).reshape(data.shape)

thresh = 0.5
try:
Expand Down Expand Up @@ -389,8 +389,8 @@ def __init__(self, categorical_data, categories=None, jitter=None, units=None):

self._data = categorical_ndarray(categorical_data, copy=False, categories=categories)

if self._data.ndim != 1:
raise ValueError("Categorical Data must be 1-dimensional")
if self._data.ndim < 1:
raise ValueError("Categorical Data must be at least 1-dimensional")

self.jitter(method=jitter)

Expand Down Expand Up @@ -435,7 +435,7 @@ def jitter(self, method=None):
Parameters
----------
method : {None, 'uniform'}
If `None`, not jittering is done (or any jittering is undone).
If `None`, no jittering is done (or any jittering is undone).
If ``'uniform'``, the codes are randomized by a uniformly
distributed random variable.
"""
Expand Down
2 changes: 1 addition & 1 deletion glue/core/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ def to_mask(self, data, view=None):
x = data[self.att, view]
result = self.roi.contains(x, None)
assert x.shape == result.shape
return result.ravel()
return result

def copy(self):
result = CategoricalROISubsetState()
Expand Down
17 changes: 17 additions & 0 deletions glue/core/tests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,23 @@ def test_valueerror_on_bad_jitter(self):
cat_comp.jitter(method='this will never be a jitter method')


def test_nd_categorical_component():
data = [['a', 'b'], ['c', 'b']]
cat_comp = CategoricalComponent(data)
np.testing.assert_equal(cat_comp.data, data)
np.testing.assert_equal(cat_comp.labels, data)
np.testing.assert_equal(cat_comp.codes, [[0, 1], [2, 1]])
np.testing.assert_equal(cat_comp.categories, ['a', 'b', 'c'])


def test_nd_categorical_component_autotype():
x = np.array([['M', 'F'], ['F', 'M']])
assert Component.autotyped(x).categorical

d = Data(x=[['male', 'female', 'male'], ['female', 'female', 'male']])
assert d.get_component('x').categorical


class TestCoordinateComponent(object):

def setup_method(self, method):
Expand Down
12 changes: 12 additions & 0 deletions glue/core/tests/test_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,18 @@ def test_inherited_properties():
assert sub.hub is d.hub


class TestNDCategoricalSubsets():

def setup_method(self, method):
self.data = Data(sex=[['Male', 'Male', 'Female'], ['Female', 'Male', 'Female']])

def test_categorical_roi_subset(self):
roi = CategoricalROI(['sex', 'Male'])
subset = self.data.new_subset()
subset.subset_state = CategoricalROISubsetState(att=self.data.id['sex'], roi=roi)
np.testing.assert_equal(self.data.subsets[0].to_mask(), [[1, 1, 0], [0, 1, 0]])


class TestCloneSubsetStates():

def setup_method(self, method):
Expand Down
4 changes: 2 additions & 2 deletions glue/utils/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def unique(array):
# numpy.unique doesn't handle mixed-types on python3,
# so we use pandas
array = np.asarray(array)
I, U = pd.factorize(array, sort=True)
return U.astype(array.dtype), I
I, U = pd.factorize(array.ravel(), sort=True)
return U.astype(array.dtype), I.reshape(array.shape)


def shape_to_string(shape):
Expand Down
8 changes: 8 additions & 0 deletions glue/utils/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ def test_unique_dtype():
assert U.dtype.kind in 'SU'


def test_unique_nd():

array = np.array([['a', 'b'], ['c', 'a']])
U, I = unique(array)
assert_equal(U, ['a', 'b', 'c'])
assert_equal(I, [[0, 1], [2, 0]])


def test_shape_to_string():
assert shape_to_string((1, 4, 3)) == "(1, 4, 3)"

Expand Down

0 comments on commit 5978a92

Please sign in to comment.