Skip to content
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ mast
jplspec
^^^^^^^

- Refactored to use linelists.core. Added new ``get_molecule`` method [#3456]
- Moved to linelists/. astroquery.jplspec is now deprecated in favor of astroquery.linelists.jplspec [#3455]


linelists.jplspec
^^^^^^^^^^^^^^^^^

Expand All @@ -71,6 +73,12 @@ mpc

- Fix bug in queries for interstellar objects with `MPC.get_observations` and enable queries for "dead" comets [#3474]

linelists
^^^^^^^^^

- General tools for both CDMS/JPL moved to linelists.core [#3456]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure we need this line as users are not expected to interact with core.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair enough, but the changelog is also a little bit for devs, no? I'm fine removing this

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, then we can keep it. I may still need to reformat it before release to be more specific; and thus useful for the devs :)

- Added jplspec, moved from its previous location (astroquery.jplspec to astroquery.linelists.jplspec) [#3455]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is already listed above

Suggested change
- Added jplspec, moved from its previous location (astroquery.jplspec to astroquery.linelists.jplspec) [#3455]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you noting that this is redundant information with that under the generic linelists category? I think the change should be noted in both places.


xmatch
^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion astroquery/jplspec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
JPL Spectral Catalog (Deprecated Location)
-------------------------------------------
------------------------------------------

.. deprecated:: 0.4.12
The `astroquery.jplspec` module has been moved to `astroquery.linelists.jplspec`.
Expand Down
7 changes: 7 additions & 0 deletions astroquery/linelists/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Linelists module
----------------
This module contains sub-modules to support molecular and atomic line list
modules and common utilities for parsing catalog files.
"""
171 changes: 115 additions & 56 deletions astroquery/linelists/cdms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from astropy import table
from astropy.io import ascii
from astroquery.query import BaseQuery
from astroquery.utils import async_to_sync
# import configurable items declared in __init__.py
from astroquery.linelists.cdms import conf
from astroquery.exceptions import InvalidQueryError, EmptyResponseError
from astroquery.linelists.core import parse_letternumber, parse_molid
from astroquery.utils import process_asyncs
from astroquery import log

import re
import string

__all__ = ['CDMS', 'CDMSClass']

Expand All @@ -25,7 +25,6 @@ def data_path(filename):
return os.path.join(data_dir, filename)


@async_to_sync
class CDMSClass(BaseQuery):
# use the Configuration Items imported from __init__.py
URL = conf.search
Expand All @@ -34,6 +33,86 @@ class CDMSClass(BaseQuery):
TIMEOUT = conf.timeout
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN']

def __init__(self, *, fallback_to_getmolecule=False):
"""
CDMS line list query class

Parameters
----------
fallback_to_getmolecule : bool, optional
If True, when a molecule is requested that results in a malformatted
or unparseable response, ``get_molecule`` will be attempted
automatically to retrieve the full catalog for that molecule. In
this case, no frequency-based selection will be applied.
"""
super().__init__()

def _mol_to_payload(self, molecule, *, parse_name_locally=False, flags=0):
if parse_name_locally:
self.lookup_ids = build_lookup()
luts = self.lookup_ids.find(molecule, flags)
if len(luts) == 0:
raise InvalidQueryError('No matching species found. Please '
'refine your search or read the Docs '
'for pointers on how to search.')
return tuple(f"{val:06d} {key}"
for key, val in luts.items())[0]
else:
return molecule

def query_lines(self, min_frequency, max_frequency, *,
min_strength=-500, molecule='All',
temperature_for_intensity=300, flags=0,
parse_name_locally=False, get_query_payload=False,
fallback_to_getmolecule=False,
verbose=False,
cache=True):

# Check if a malformatted molecule was requested and use fallback if enabled
# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
badlist = (self.MALFORMATTED_MOLECULE_LIST
+ [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])

# extract molecule from the response or request
requested_molecule = self._mol_to_payload(molecule, parse_name_locally=parse_name_locally,
flags=flags) if molecule != 'All' else None

if requested_molecule and requested_molecule in badlist and not get_query_payload:
if fallback_to_getmolecule:
try:
return self.get_molecule(requested_molecule[:6])
except ValueError as ex:
# try to give the users good guidance on which parameters will work
if "molecule_id should be an integer or a length-6 string of numbers" in str(ex):
if parse_name_locally:
raise ValueError(f"Molecule {molecule} could not be parsed or identified."
" Check that the name was correctly specified.")
else:
raise ValueError(f"Molecule {molecule} needs to be formatted as"
" a 6-digit string ID for the get_molecule fallback to work."
" Try setting parse_name_locally=True "
"to turn your molecule name into a CDMS number ID.")
else:
raise ex
else:
raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. "
f"Try get_molecule({requested_molecule}) instead or set "
f"CDMS.fallback_to_getmolecule = True.")
else:
response = self.query_lines_async(min_frequency=min_frequency,
max_frequency=max_frequency,
min_strength=min_strength,
molecule=molecule,
temperature_for_intensity=temperature_for_intensity,
flags=flags,
parse_name_locally=parse_name_locally,
get_query_payload=get_query_payload,
cache=cache)
if get_query_payload:
return response
else:
return self._parse_result(response, molname=molecule, verbose=verbose)

def query_lines_async(self, min_frequency, max_frequency, *,
min_strength=-500, molecule='All',
temperature_for_intensity=300, flags=0,
Expand Down Expand Up @@ -140,17 +219,9 @@ def query_lines_async(self, min_frequency, max_frequency, *,
payload['Moleculesgrp'] = 'all species'
else:
if molecule is not None:
if parse_name_locally:
self.lookup_ids = build_lookup()
luts = self.lookup_ids.find(molecule, flags)
if len(luts) == 0:
raise InvalidQueryError('No matching species found. Please '
'refine your search or read the Docs '
'for pointers on how to search.')
payload['Molecules'] = tuple(f"{val:06d} {key}"
for key, val in luts.items())[0]
else:
payload['Molecules'] = molecule
payload['Molecules'] = self._mol_to_payload(molecule,
parse_name_locally=parse_name_locally,
flags=flags)

if get_query_payload:
return payload
Expand Down Expand Up @@ -182,16 +253,11 @@ def query_lines_async(self, min_frequency, max_frequency, *,
response2 = self._request(method='GET', url=fullurl,
timeout=self.TIMEOUT, cache=cache)

# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa
[y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist:
raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. "
f"Try get_molecule({payload['Molecules']}) instead.")

return response2

def _parse_result(self, response, *, verbose=False):
query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__)

def _parse_result(self, response, *, verbose=False, molname=None):
"""
Parse a response into an `~astropy.table.Table`

Expand Down Expand Up @@ -238,6 +304,8 @@ def _parse_result(self, response, *, verbose=False):
soup = BeautifulSoup(response.text, 'html.parser')
text = soup.find('pre').text

# this is a different workaround to try to make _some_ of the bad molecules parseable
# (it doesn't solve all of them, which is why the above fallback exists)
need_to_filter_bad_molecules = False
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
if text.find(bad_molecule.split()[1]) > -1:
Expand Down Expand Up @@ -316,7 +384,7 @@ def _parse_result(self, response, *, verbose=False):
except ValueError as ex:
# Give users a more helpful exception when parsing fails
new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
"You can check this by running `CDMS.get_molecule('<id>')` instead; if it works, the "
f"You can check this by running `CDMS.get_molecule('{molname}')` instead; if it works, the "
"problem is caused by the CDMS search interface and cannot be worked around.")
raise ValueError(new_message) from ex

Expand Down Expand Up @@ -423,8 +491,9 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):

Parameters
----------
molecule_id : str
The 6-digit molecule identifier as a string
molecule_id : int or str
The molecule tag/identifier. Can be an integer (e.g., 18003 for H2O)
or a zero-padded 6-character string (e.g., '018003').
cache : bool
Defaults to True. If set overrides global caching behavior.
See :ref:`caching documentation <astroquery_cache>`.
Expand All @@ -433,8 +502,8 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):
the response. If this is set, the response will be returned whether
or not it was successful. Default is False.
"""
if not isinstance(molecule_id, str) or len(molecule_id) != 6:
raise ValueError("molecule_id should be a length-6 string of numbers")
molecule_id = parse_molid(molecule_id)

url = f'{self.CLASSIC_URL}/entries/c{molecule_id}.cat'
response = self._request(method='GET', url=url,
timeout=self.TIMEOUT, cache=cache)
Expand All @@ -456,14 +525,25 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):

def _parse_cat(self, text, *, verbose=False):
"""
Parse a catalog response into an `~astropy.table.Table`
Parse a CDMS-format catalog file into an `~astropy.table.Table`.

See details in _parse_response; this is a very similar function,
but the catalog responses have a slightly different format.
The catalog data files are composed of 80-character card images.
Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]:
FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN

Parameters
----------
text : str
The catalog file text content.
verbose : bool, optional
Not used currently.

Returns
-------
Table : `~astropy.table.Table`
Parsed catalog data.
"""
# notes about the format
# [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa
# 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa
# Column start positions
starts = {'FREQ': 0,
'ERR': 14,
'LGINT': 22,
Expand Down Expand Up @@ -494,7 +574,9 @@ def _parse_cat(self, text, *, verbose=False):
col_starts=list(starts.values()),
format='fixed_width', fast_reader=False)

# Ensure TAG is integer type for computation
# int truncates - which is what we want
result['TAG'] = result['TAG'].astype(int)
result['MOLWT'] = [int(x/1e3) for x in result['TAG']]

result['FREQ'].unit = u.MHz
Expand Down Expand Up @@ -527,29 +609,6 @@ def _parse_cat(self, text, *, verbose=False):
CDMS = CDMSClass()


def parse_letternumber(st):
"""
Parse CDMS's two-letter QNs into integers.

Masked values are converted to -999999.

From the CDMS docs:
"Exactly two characters are available for each quantum number. Therefore, half
integer quanta are rounded up ! In addition, capital letters are used to
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
"""
if np.ma.is_masked(st):
return -999999

asc = string.ascii_lowercase
ASC = string.ascii_uppercase
newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else
str((ASC.index(x)+10)) if x in ASC else
x for x in st])
return int(newst)


class Lookuptable(dict):

def find(self, st, flags):
Expand Down
1 change: 1 addition & 0 deletions astroquery/linelists/cdms/setup_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def get_package_data():
paths_test = [os.path.join('data', '028503 CO, v=0.data'),
os.path.join('data', '117501 HC7S.data'),
os.path.join('data', '099501 HC7N, v=0.data'),
os.path.join('data', 'c058501.cat'),
os.path.join('data', 'post_response.html'),
]

Expand Down
3 changes: 3 additions & 0 deletions astroquery/linelists/cdms/tests/data/c058501.cat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
114.9627 0.0001-10.6817 3 9.7413 9 58501 303 1 1 0 1 1 1
344.8868 0.0002 -9.9842 3 10.4849 15 58501 303 2 1 1 2 1 2
689.7699 0.0004 -9.5394 3 11.6003 21 58501 303 3 1 2 3 1 3
Loading
Loading