Skip to content

Commit 203a0a5

Browse files
committed
Refactored JPLspec and CDMS to put both in the linelists/ directory.
The commit messages below come from individual commits that were squashed into one. A lot of other messages were redundant and manually edited out: factor pseudo-common code into linelists from cdms/jplspec refactor jplspec to not use async machinery refactor to use absolute imports
1 parent 65aacfb commit 203a0a5

File tree

17 files changed

+1177
-170
lines changed

17 files changed

+1177
-170
lines changed

CHANGES.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,27 @@ mast
5252

5353
- Raise informative error if ``MastMissions`` query radius is too large. [#3447]
5454

55+
5556
jplspec
5657
^^^^^^^
5758

59+
- Refactored to use linelists.core [#3456]
5860
- Moved to linelists/. astroquery.jplspec is now deprecated in favor of astroquery.linelists.jplspec [#3455]
5961

62+
6063
linelists.jplspec
6164
^^^^^^^^^^^^^^^^^
6265

6366
- New location for jplspec. astroquery.jplspec is now deprecated in favor of astroquery.linelists.jplspec [#3455]
6467

68+
69+
linelists
70+
^^^^^^^^^
71+
72+
- General tools for both CDMS/JPL moved to linelists.core [#3456]
73+
- Added jplspec, moved from its previous location (astroquery.jplspec to astroquery.linelists.jplspec) [#3455]
74+
75+
6576
xmatch
6677
^^^^^^
6778

astroquery/jplspec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Licensed under a 3-clause BSD style license - see LICENSE.rst
22
"""
33
JPL Spectral Catalog (Deprecated Location)
4-
-------------------------------------------
4+
------------------------------------------
55
66
.. deprecated:: 0.4.12
77
The `astroquery.jplspec` module has been moved to `astroquery.linelists.jplspec`.

astroquery/linelists/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Licensed under a 3-clause BSD style license - see LICENSE.rst
2+
"""
3+
Linelists module
4+
----------------
5+
This module contains sub-modules for various molecular and atomic line list databases,
6+
as well as common utilities for parsing catalog files.
7+
"""
8+
9+
from astroquery.linelists.core import parse_letternumber
10+
11+
__all__ = ['parse_letternumber']

astroquery/linelists/cdms/core.py

Lines changed: 96 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88
from astropy import table
99
from astropy.io import ascii
1010
from astroquery.query import BaseQuery
11-
from astroquery.utils import async_to_sync
1211
# import configurable items declared in __init__.py
1312
from astroquery.linelists.cdms import conf
1413
from astroquery.exceptions import InvalidQueryError, EmptyResponseError
14+
from ..core import parse_letternumber
15+
from astroquery.utils import process_asyncs
1516
from astroquery import log
1617

1718
import re
18-
import string
1919

2020
__all__ = ['CDMS', 'CDMSClass']
2121

@@ -25,7 +25,6 @@ def data_path(filename):
2525
return os.path.join(data_dir, filename)
2626

2727

28-
@async_to_sync
2928
class CDMSClass(BaseQuery):
3029
# use the Configuration Items imported from __init__.py
3130
URL = conf.search
@@ -34,6 +33,74 @@ class CDMSClass(BaseQuery):
3433
TIMEOUT = conf.timeout
3534
MALFORMATTED_MOLECULE_LIST = ['017506 NH3-wHFS', '028528 H2NC', '058501 H2C2S', '064527 HC3HCN']
3635

36+
def __init__(self, fallback_to_getmolecule=False):
37+
super().__init__()
38+
39+
def _mol_to_payload(self, molecule, parse_name_locally, flags):
40+
if parse_name_locally:
41+
self.lookup_ids = build_lookup()
42+
luts = self.lookup_ids.find(molecule, flags)
43+
if len(luts) == 0:
44+
raise InvalidQueryError('No matching species found. Please '
45+
'refine your search or read the Docs '
46+
'for pointers on how to search.')
47+
return tuple(f"{val:06d} {key}"
48+
for key, val in luts.items())[0]
49+
else:
50+
return molecule
51+
52+
def query_lines(self, min_frequency, max_frequency, *,
53+
min_strength=-500, molecule='All',
54+
temperature_for_intensity=300, flags=0,
55+
parse_name_locally=False, get_query_payload=False,
56+
fallback_to_getmolecule=False,
57+
verbose=False,
58+
cache=True):
59+
60+
# Check if a malformatted molecule was requested and use fallback if enabled
61+
# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
62+
badlist = (self.MALFORMATTED_MOLECULE_LIST
63+
+ [y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
64+
65+
# extract molecule from the response or request
66+
requested_molecule = self._mol_to_payload(molecule, parse_name_locally, flags) if molecule != 'All' else None
67+
68+
if requested_molecule and requested_molecule in badlist and not get_query_payload:
69+
if fallback_to_getmolecule:
70+
try:
71+
return self.get_molecule(requested_molecule[:6])
72+
except ValueError as ex:
73+
# try to give the users good guidance on which parameters will work
74+
if "molecule_id should be a length-6 string of numbers" in str(ex):
75+
if parse_name_locally:
76+
raise ValueError(f"Molecule {molecule} could not be parsed or identified."
77+
" Check that the name was correctly specified.")
78+
else:
79+
raise ValueError(f"Molecule {molecule} needs to be formatted as"
80+
" a 6-digit string ID for the get_molecule fallback to work."
81+
" Try setting parse_name_locally=True "
82+
"to turn your molecule name into a CDMS number ID.")
83+
else:
84+
raise ex
85+
else:
86+
raise ValueError(f"Molecule {requested_molecule} is known not to comply with standard CDMS format. "
87+
f"Try get_molecule({requested_molecule}) instead or set "
88+
f"CDMS.fallback_to_getmolecule = True.")
89+
else:
90+
response = self.query_lines_async(min_frequency=min_frequency,
91+
max_frequency=max_frequency,
92+
min_strength=min_strength,
93+
molecule=molecule,
94+
temperature_for_intensity=temperature_for_intensity,
95+
flags=flags,
96+
parse_name_locally=parse_name_locally,
97+
get_query_payload=get_query_payload,
98+
cache=cache)
99+
if get_query_payload:
100+
return response
101+
else:
102+
return self._parse_result(response, molname=molecule, verbose=verbose)
103+
37104
def query_lines_async(self, min_frequency, max_frequency, *,
38105
min_strength=-500, molecule='All',
39106
temperature_for_intensity=300, flags=0,
@@ -140,17 +207,7 @@ def query_lines_async(self, min_frequency, max_frequency, *,
140207
payload['Moleculesgrp'] = 'all species'
141208
else:
142209
if molecule is not None:
143-
if parse_name_locally:
144-
self.lookup_ids = build_lookup()
145-
luts = self.lookup_ids.find(molecule, flags)
146-
if len(luts) == 0:
147-
raise InvalidQueryError('No matching species found. Please '
148-
'refine your search or read the Docs '
149-
'for pointers on how to search.')
150-
payload['Molecules'] = tuple(f"{val:06d} {key}"
151-
for key, val in luts.items())[0]
152-
else:
153-
payload['Molecules'] = molecule
210+
payload['Molecules'] = self._mol_to_payload(molecule, parse_name_locally, flags)
154211

155212
if get_query_payload:
156213
return payload
@@ -182,16 +239,11 @@ def query_lines_async(self, min_frequency, max_frequency, *,
182239
response2 = self._request(method='GET', url=fullurl,
183240
timeout=self.TIMEOUT, cache=cache)
184241

185-
# accounts for three formats, e.g.: '058501' or 'H2C2S' or '058501 H2C2S'
186-
badlist = (self.MALFORMATTED_MOLECULE_LIST + # noqa
187-
[y for x in self.MALFORMATTED_MOLECULE_LIST for y in x.split()])
188-
if 'Moleculesgrp' not in payload.keys() and payload['Molecules'] in badlist:
189-
raise ValueError(f"Molecule {payload['Molecules']} is known not to comply with standard CDMS format. "
190-
f"Try get_molecule({payload['Molecules']}) instead.")
191-
192242
return response2
193243

194-
def _parse_result(self, response, *, verbose=False):
244+
query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__)
245+
246+
def _parse_result(self, response, *, verbose=False, molname=None):
195247
"""
196248
Parse a response into an `~astropy.table.Table`
197249
@@ -238,6 +290,8 @@ def _parse_result(self, response, *, verbose=False):
238290
soup = BeautifulSoup(response.text, 'html.parser')
239291
text = soup.find('pre').text
240292

293+
# this is a different workaround to try to make _some_ of the bad molecules parseable
294+
# (it doesn't solve all of them, which is why the above fallback exists)
241295
need_to_filter_bad_molecules = False
242296
for bad_molecule in self.MALFORMATTED_MOLECULE_LIST:
243297
if text.find(bad_molecule.split()[1]) > -1:
@@ -316,7 +370,7 @@ def _parse_result(self, response, *, verbose=False):
316370
except ValueError as ex:
317371
# Give users a more helpful exception when parsing fails
318372
new_message = ("Failed to parse CDMS response. This may be caused by a malformed search return. "
319-
"You can check this by running `CDMS.get_molecule('<id>')` instead; if it works, the "
373+
f"You can check this by running `CDMS.get_molecule('{molname}')` instead; if it works, the "
320374
"problem is caused by the CDMS search interface and cannot be worked around.")
321375
raise ValueError(new_message) from ex
322376

@@ -456,14 +510,25 @@ def get_molecule(self, molecule_id, *, cache=True, return_response=False):
456510

457511
def _parse_cat(self, text, *, verbose=False):
458512
"""
459-
Parse a catalog response into an `~astropy.table.Table`
513+
Parse a CDMS-format catalog file into an `~astropy.table.Table`.
514+
515+
The catalog data files are composed of 80-character card images.
516+
Format: [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]:
517+
FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN
460518
461-
See details in _parse_response; this is a very similar function,
462-
but the catalog responses have a slightly different format.
519+
Parameters
520+
----------
521+
text : str
522+
The catalog file text content.
523+
verbose : bool, optional
524+
Not used currently.
525+
526+
Returns
527+
-------
528+
Table : `~astropy.table.Table`
529+
Parsed catalog data.
463530
"""
464-
# notes about the format
465-
# [F13.4, 2F8.4, I2, F10.4, I3, I7, I4, 12I2]: FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN noqa
466-
# 13 21 29 31 41 44 51 55 57 59 61 63 65 67 69 71 73 75 77 79 noqa
531+
# Column start positions
467532
starts = {'FREQ': 0,
468533
'ERR': 14,
469534
'LGINT': 22,
@@ -494,7 +559,9 @@ def _parse_cat(self, text, *, verbose=False):
494559
col_starts=list(starts.values()),
495560
format='fixed_width', fast_reader=False)
496561

562+
# Ensure TAG is integer type for computation
497563
# int truncates - which is what we want
564+
result['TAG'] = result['TAG'].astype(int)
498565
result['MOLWT'] = [int(x/1e3) for x in result['TAG']]
499566

500567
result['FREQ'].unit = u.MHz
@@ -527,29 +594,6 @@ def _parse_cat(self, text, *, verbose=False):
527594
CDMS = CDMSClass()
528595

529596

530-
def parse_letternumber(st):
531-
"""
532-
Parse CDMS's two-letter QNs into integers.
533-
534-
Masked values are converted to -999999.
535-
536-
From the CDMS docs:
537-
"Exactly two characters are available for each quantum number. Therefore, half
538-
integer quanta are rounded up ! In addition, capital letters are used to
539-
indicate quantum numbers larger than 99. E. g. A0 is 100, Z9 is 359. Lower case characters
540-
are used similarly to signal negative quantum numbers smaller than –9. e. g., a0 is –10, b0 is –20, etc."
541-
"""
542-
if np.ma.is_masked(st):
543-
return -999999
544-
545-
asc = string.ascii_lowercase
546-
ASC = string.ascii_uppercase
547-
newst = ''.join(['-' + str((asc.index(x)+1)) if x in asc else
548-
str((ASC.index(x)+10)) if x in ASC else
549-
x for x in st])
550-
return int(newst)
551-
552-
553597
class Lookuptable(dict):
554598

555599
def find(self, st, flags):

astroquery/linelists/cdms/setup_package.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def get_package_data():
99
paths_test = [os.path.join('data', '028503 CO, v=0.data'),
1010
os.path.join('data', '117501 HC7S.data'),
1111
os.path.join('data', '099501 HC7N, v=0.data'),
12+
os.path.join('data', 'c058501.cat'),
1213
os.path.join('data', 'post_response.html'),
1314
]
1415

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
114.9627 0.0001-10.6817 3 9.7413 9 58501 303 1 1 0 1 1 1
2+
344.8868 0.0002 -9.9842 3 10.4849 15 58501 303 2 1 1 2 1 2
3+
689.7699 0.0004 -9.5394 3 11.6003 21 58501 303 3 1 2 3 1 3

0 commit comments

Comments
 (0)