Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for string columns in FITS files #919

Merged
merged 8 commits into from
Apr 4, 2016
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ v0.8 (unreleased)
v0.7.2 (unreleased)
-------------------

* Fix a bug that caused string columns in FITS files to not be read
correctly, and updated coerce_numeric to give a ValueError for string
columns that can't be convered.

* Make sure main window title is set. [#914]

v0.7.1 (2016-03-29)
Expand Down
1 change: 1 addition & 0 deletions glue/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def autotyped(cls, data, units=None):
return CategoricalComponent(data, units=units)

n = coerce_numeric(data)

thresh = 0.5
try:
use_categorical = np.issubdtype(data.dtype, np.character) and \
Expand Down
2 changes: 1 addition & 1 deletion glue/core/data_factories/fits.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def new_data():
if column.ndim != 1:
warnings.warn("Dropping column '{0}' since it is not 1-dimensional".format(column_name))
continue
component = Component(column, units=column.unit)
component = Component.autotyped(column, units=column.unit)
data.add_component(component=component,
label=column_name)
return [groups[idx] for idx in groups]
Expand Down
4 changes: 2 additions & 2 deletions glue/core/data_factories/npy.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def npy_reader(filename, format='auto', auto_merge=False, **kwargs):

d = Data()
for name in npy_data.dtype.names:
comp = Component(npy_data[name])
comp = Component.autotyped(npy_data[name])
d.add_component(comp, label=name)

return d
Expand Down Expand Up @@ -79,7 +79,7 @@ def npz_reader(filename, format='auto', auto_merge=False, **kwargs):
" arrays, e.g., with specified names.")

for name in arr.dtype.names:
comp = Component(arr[name])
comp = Component.autotyped(arr[name])
d.add_component(comp, label=name)

groups.append(d)
Expand Down
16 changes: 10 additions & 6 deletions glue/core/data_factories/tests/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ def test_npy_load(tmpdir):
f.seek(0)

data2 = df.load_data(f.name)
for name in data.dtype.names:
assert_array_equal(data[name], data2[name])
assert_array_equal(data['name'], data2.get_component('name').labels)
assert_array_equal(data['ra'], data2['ra'])
assert_array_equal(data['dec'], data2['dec'])

def test_npz_load(tmpdir):
data1 = np.array([("a",152.2352,-21.513), ("b",21.412,35.1341)],
Expand All @@ -26,15 +27,18 @@ def test_npz_load(tmpdir):
dtype=[('name','|S1'),('l','f8'),('b','f8')])

with open(tmpdir.join('test.npz').strpath, 'wb') as f:

np.savez(f, data1=data1, data2=data2)
f.seek(0)

data_loaded = df.load_data(f.name)

arr = data_loaded[0]
for name in data1.dtype.names:
assert_array_equal(data1[name], arr[name])
assert_array_equal(data1['name'], arr.get_component('name').labels)
assert_array_equal(data1['ra'], arr['ra'])
assert_array_equal(data1['dec'], arr['dec'])

arr = data_loaded[1]
for name in data2.dtype.names:
assert_array_equal(data2[name], arr[name])
assert_array_equal(data2['name'], arr.get_component('name').labels)
assert_array_equal(data2['l'], arr['l'])
assert_array_equal(data2['b'], arr['b'])
5 changes: 4 additions & 1 deletion glue/utils/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ def coerce_numeric(arr):
return arr.astype(np.int)

# a string dtype, or anything else
return pd.Series(arr).convert_objects(convert_numeric=True).values
try:
return pd.to_numeric(arr, errors='coerce')
except AttributeError: # older versions of pandas
return pd.Series(arr).convert_objects(convert_numeric=True).values


def check_sorted(array):
Expand Down