Skip to content

Commit

Permalink
Merge pull request #762 from astrofrog/astropy-ascii-tables
Browse files Browse the repository at this point in the history
Astropy ASCII tables
  • Loading branch information
astrofrog committed Oct 22, 2015
2 parents 559aa08 + 9500a1a commit 323898a
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 121 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ v0.6 (unreleased)
* When a box has been drawn to extract a spectrum from a cube, the box can
then be moved by pressing the control key and dragging it. [#707]

* Refactored ASCII I/O to include more Astropy table formats. [#762]

* When saving a session, if no extension is specified, the .glu extension is
added. [#729]

Expand Down
1 change: 1 addition & 0 deletions glue/core/data_factories/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .helpers import *
from .fits import *
from .hdf5 import *
from .astropy_table import *
from .pandas import *
from .excel import *
from .image import *
Expand Down
115 changes: 115 additions & 0 deletions glue/core/data_factories/astropy_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from __future__ import absolute_import, division, print_function

import numpy as np

from ..data import Component, Data
from ...config import data_factory

from .helpers import has_extension

__all__ = ['astropy_tabular_data', 'sextractor_factory', 'cds_factory',
'daophot_factory', 'ipac_factory', 'aastex_factory',
'latex_factory']


# In this file, we define data factories based on the Astropy table reader.


def is_readable_by_astropy(filename, **kwargs):
# This identifier is not efficient, because it involves actually trying
# to read in the table. However, we only use this as the identifier for
# the astropy_tabular_data factory which has a priority of 0 and is
# therefore only used as a last attempt if all else fails.
try:
astropy_table_read(filename, **kwargs)
except:
return False
else:
return True


def astropy_table_read(*args, **kwargs):

from astropy.table import Table

# In Python 3, as of Astropy 0.4, if the format is not specified, the
# automatic format identification will fail (astropy/astropy#3013).
# This is only a problem for ASCII formats however, because it is due
# to the fact that the file object in io.ascii does not rewind to the
# start between guesses (due to a bug), so here we can explicitly try
# the ASCII format if the format keyword was not already present. But
# also more generally, we should first try the ASCII readers.
if 'format' not in kwargs:
try:
return Table.read(*args, format='ascii', **kwargs)
except:
pass

# If the above didn't work, attempt to read with no specified format
return Table.read(*args, **kwargs)


@data_factory(label="Catalog (astropy.table parser)",
identifier=is_readable_by_astropy,
priority=0)
def astropy_tabular_data(*args, **kwargs):
"""
Build a data set from a table. We restrict ourselves to tables
with 1D columns.
All arguments are passed to
astropy.table.Table.read(...).
"""

result = Data()

table = astropy_table_read(*args, **kwargs)

# Loop through columns and make component list
for column_name in table.columns:
c = table[column_name]
u = c.unit if hasattr(c, 'unit') else c.units

if table.masked:
# fill array for now
try:
c = c.filled(fill_value=np.nan)
except ValueError: # assigning nan to integer dtype
c = c.filled(fill_value=-1)

nc = Component.autotyped(c, units=u)
result.add_component(nc, column_name)

return result


@data_factory(label="VO or FITS table",
identifier=has_extension('xml vot fits xml.gz vot.gz fits.gz'),
priority=1)
def astropy_tabular_data_specialized(*args, **kwargs):
return astropy_tabular_data(*args, **kwargs)


# Add explicit factories for the formats which astropy.table
# can parse, but does not auto-identify


def formatted_table_factory(format, label):

@data_factory(label=label, identifier=lambda *a, **k: False)
def factory(file, **kwargs):
kwargs['format'] = 'ascii.%s' % format
return astropy_tabular_data(file, **kwargs)

# rename function to its variable reference below
# allows pickling to work
factory.__name__ = '%s_factory' % format

return factory

sextractor_factory = formatted_table_factory('sextractor', 'SExtractor Catalog')
cds_factory = formatted_table_factory('cds', 'CDS Catalog')
daophot_factory = formatted_table_factory('daophot', 'DAOphot Catalog')
ipac_factory = formatted_table_factory('ipac', 'IPAC Catalog')
aastex_factory = formatted_table_factory('aastex', 'AASTeX Table')
latex_factory = formatted_table_factory('latex', 'LaTeX Table')
127 changes: 6 additions & 121 deletions glue/core/data_factories/tables.py
Original file line number Diff line number Diff line change
@@ -1,110 +1,19 @@
from __future__ import absolute_import, division, print_function

import numpy as np

from ..data import Component, Data
from ...external import six
from ...config import data_factory

from .helpers import has_extension

__all__ = ['tabular_data', 'sextractor_factory', 'astropy_tabular_data',
'formatted_table_factory']


def _ascii_identifier_v02(origin, args, kwargs):
# this works for astropy v0.2
if isinstance(args[0], six.string_types):
return args[0].endswith(('csv', 'tsv', 'txt', 'tbl', 'dat',
'csv.gz', 'tsv.gz', 'txt.gz', 'tbl.gz',
'dat.gz'))
else:
return False


def _ascii_identifier_v03(origin, *args, **kwargs):
# this works for astropy v0.3
return _ascii_identifier_v02(origin, args, kwargs)


@data_factory(label="Catalog (Astropy Parser)",
identifier=has_extension('xml vot csv txt tsv tbl dat fits '
'xml.gz vot.gz csv.gz txt.gz tbl.bz '
'dat.gz fits.gz'))
def astropy_tabular_data(*args, **kwargs):
"""
Build a data set from a table. We restrict ourselves to tables
with 1D columns.
All arguments are passed to
astropy.table.Table.read(...).
"""
from distutils.version import LooseVersion
from astropy import __version__
if LooseVersion(__version__) < LooseVersion("0.2"):
raise RuntimeError("Glue requires astropy >= v0.2. Please update")

result = Data()

# Read the table
from astropy.table import Table

# Add identifiers for ASCII data
from astropy.io import registry
if LooseVersion(__version__) < LooseVersion("0.3"):
registry.register_identifier('ascii', Table, _ascii_identifier_v02,
force=True)
else:
# Basically, we always want the plain ascii reader for now.
# But astropy will complain about ambiguous formats (or use another reader)
# unless we remove other registry identifiers and set up our own reader

nope = lambda *a, **k: False
registry.register_identifier('ascii.glue', Table, _ascii_identifier_v03,
force=True)
registry.register_identifier('ascii.csv', Table, nope, force=True)
registry.register_identifier('ascii.fast_csv', Table, nope, force=True)
registry.register_identifier('ascii', Table, nope, force=True)
registry.register_reader('ascii.glue', Table,
lambda path: Table.read(path, format='ascii'),
force=True)

try:
table = Table.read(*args, **kwargs)
except:
# In Python 3, as of Astropy 0.4, if the format is not specified, the
# automatic format identification will fail (astropy/astropy#3013).
# This is only a problem for ASCII formats however, because it is due
# to the fact that the file object in io.ascii does not rewind to the
# start between guesses (due to a bug), so here we can explicitly try
# the ASCII format if the format keyword was not already present.
if 'format' not in kwargs:
table = Table.read(*args, format='ascii.glue', **kwargs)
else:
raise

# Loop through columns and make component list
for column_name in table.columns:
c = table[column_name]
u = c.unit if hasattr(c, 'unit') else c.units

if table.masked:
# fill array for now
try:
c = c.filled(fill_value=np.nan)
except ValueError: # assigning nan to integer dtype
c = c.filled(fill_value=-1)

nc = Component.autotyped(c, units=u)
result.add_component(nc, column_name)
# Backward-compatibility
from .astropy_table import astropy_tabular_data

return result
__all__ = ['tabular_data']


@data_factory(label="Catalog",
identifier=has_extension('xml vot csv txt tsv tbl dat fits '
'xml.gz vot.gz csv.gz txt.gz tbl.bz '
'dat.gz fits.gz'),
identifier=has_extension('csv txt tsv tbl dat '
'csv.gz txt.gz tbl.bz '
'dat.gz'),
priority=1)
def tabular_data(path, **kwargs):
from .pandas import pandas_read_table
Expand All @@ -116,27 +25,3 @@ def tabular_data(path, **kwargs):
else:
raise IOError("Could not parse file: %s" % path)


# Add explicit factories for the formats which astropy.table
# can parse, but does not auto-identify


def formatted_table_factory(format, label):

@data_factory(label=label, identifier=lambda *a, **k: False)
def factory(file, **kwargs):
kwargs['format'] = 'ascii.%s' % format
return tabular_data(file, **kwargs)

# rename function to its variable reference below
# allows pickling to work
factory.__name__ = '%s_factory' % format

return factory

sextractor_factory = formatted_table_factory('sextractor', 'SExtractor Catalog')
cds_factory = formatted_table_factory('cds', 'CDS Catalog')
daophot_factory = formatted_table_factory('daophot', 'DAOphot Catalog')
ipac_factory = formatted_table_factory('ipac', 'IPAC Catalog')
aastex_factory = formatted_table_factory('aastex', 'AASTeX Table')
latex_factory = formatted_table_factory('latex', 'LaTeX Table')

0 comments on commit 323898a

Please sign in to comment.